Granite 3.3 8b Lora Math Prm

9

8.0B

1 language

license:apache-2.0

by

ibm-granite

Language Model

OTHER

8B params

New

0 downloads

Early-stage

Try on Hugging Face Add to Compare

Edge AI:

Mobile

Laptop

Server

18GB+ RAM

Mobile

Laptop

Server

Quick Summary

AI model with specialized capabilities.

Device Compatibility

Mobile

4-6GB RAM

Laptop

16GB RAM

Server

GPU

Minimum Recommended

8GB+ RAM

Code Examples

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

get step positionspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

format the promptspythontransformers

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.