Skip to content
Snippets Groups Projects
Commit 948cf314 authored by Monisha PJ's avatar Monisha PJ
Browse files

initial commit

parents
No related branches found
No related tags found
No related merge requests found
.vscode
__pycache__
myenv/
try1.py
\ No newline at end of file
# from __future__ import annotations
import os
import re
import itertools
import vertexai
import tiktoken
import json
from dotenv import load_dotenv
from langchain.llms import VertexAI
from typing import Any, Dict, List, Optional
from pydantic import Extra
from langchain.schema.language_model import BaseLanguageModel
from langchain.callbacks.manager import (
AsyncCallbackManagerForChainRun,
CallbackManagerForChainRun,
)
from langchain.schema import (
AIMessage,
HumanMessage,
SystemMessage
)
from langchain.chains.base import Chain
from langchain.prompts.base import BasePromptTemplate
from langchain.tools import DuckDuckGoSearchRun
import langchain
from langchain.chat_models import ChatVertexAI
from langchain.tools import DuckDuckGoSearchRun
from langchain.schema import (
AIMessage,
HumanMessage,
SystemMessage
)
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import SequentialChain
import prompts
from execute_verification_chain import ExecuteVerificationChain
class WikiDataCategoryListCOVEChain(object):
def __init__(self, llm):
self.llm = llm
def __call__(self):
# Create baseline response chain
baseline_response_prompt_template = PromptTemplate(input_variables=["original_question"],
template=prompts.BASELINE_PROMPT_WIKI)
baseline_response_chain = LLMChain(llm=self.llm,
prompt=baseline_response_prompt_template,
output_key="baseline_response")
# Create plan verification chain
## Create plan verification template
verification_question_template_prompt_template = PromptTemplate(input_variables=["original_question"],
template=prompts.VERIFICATION_QUESTION_TEMPLATE_PROMPT_WIKI)
verification_question_template_chain = LLMChain(llm=self.llm,
prompt=verification_question_template_prompt_template,
output_key="verification_question_template")
## Create plan verification questions
verification_question_generation_prompt_template = PromptTemplate(input_variables=["original_question",
"baseline_response",
"verification_question_template"],
template=prompts.VERIFICATION_QUESTION_PROMPT_WIKI)
verification_question_generation_chain = LLMChain(llm=self.llm,
prompt=verification_question_generation_prompt_template,
output_key="verification_questions")
# Create execution verification
execute_verification_question_prompt_template = PromptTemplate(input_variables=["verification_questions"],
template=prompts.EXECUTE_PLAN_PROMPT)
execute_verification_question_chain = ExecuteVerificationChain(llm=self.llm,
prompt=execute_verification_question_prompt_template,
output_key="verification_answers")
# Create final refined response
final_answer_prompt_template = PromptTemplate(input_variables=["original_question",
"baseline_response",
"verification_answers"],
template=prompts.FINAL_REFINED_PROMPT)
final_answer_chain = LLMChain(llm=self.llm,
prompt=final_answer_prompt_template,
output_key="final_answer")
# Create sequesntial chain
wiki_data_category_list_cove_chain = SequentialChain(
chains=[baseline_response_chain,
verification_question_template_chain,
verification_question_generation_chain,
execute_verification_question_chain,
final_answer_chain],
input_variables=["original_question"],
# Here we return multiple variables
output_variables=["original_question",
"baseline_response",
"verification_question_template",
"verification_questions",
"verification_answers",
"final_answer"],
verbose=False)
return wiki_data_category_list_cove_chain
class MultiSpanCOVEChain(object):
def __init__(self, llm):
self.llm = llm
def __call__(self):
# Create baseline response chain
baseline_response_prompt_template = PromptTemplate(input_variables=["original_question"],
template=prompts.BASELINE_PROMPT_MULTI)
baseline_response_chain = LLMChain(llm=self.llm,
prompt=baseline_response_prompt_template,
output_key="baseline_response")
## Create plan verification questions
verification_question_generation_prompt_template = PromptTemplate(input_variables=["original_question",
"baseline_response"],
template=prompts.VERIFICATION_QUESTION_PROMPT_MULTI)
verification_question_generation_chain = LLMChain(llm=self.llm,
prompt=verification_question_generation_prompt_template,
output_key="verification_questions")
# Create execution verification
execute_verification_question_prompt_template = PromptTemplate(input_variables=["verification_questions"],
template=prompts.EXECUTE_PLAN_PROMPT)
execute_verification_question_chain = ExecuteVerificationChain(llm=self.llm,
prompt=execute_verification_question_prompt_template,
output_key="verification_answers")
# Create final refined response
final_answer_prompt_template = PromptTemplate(input_variables=["original_question",
"baseline_response",
"verification_answers"],
template=prompts.FINAL_REFINED_PROMPT)
final_answer_chain = LLMChain(llm=self.llm,
prompt=final_answer_prompt_template,
output_key="final_answer")
# Create sequesntial chain
multi_span_cove_chain = SequentialChain(
chains=[baseline_response_chain,
verification_question_generation_chain,
execute_verification_question_chain,
final_answer_chain],
input_variables=["original_question"],
# Here we return multiple variables
output_variables=["original_question",
"baseline_response",
"verification_questions",
"verification_answers",
"final_answer"],
verbose=False)
return multi_span_cove_chain
class LongFormCOVEChain(object):
def __init__(self, llm):
self.llm = llm
def __call__(self):
# Create baseline response chain
baseline_response_prompt_template = PromptTemplate(input_variables=["original_question"],
template=prompts.BASELINE_PROMPT_LONG)
baseline_response_chain = LLMChain(llm=self.llm,
prompt=baseline_response_prompt_template,
output_key="baseline_response")
## Create plan verification questions
verification_question_generation_prompt_template = PromptTemplate(input_variables=["original_question",
"baseline_response"],
template=prompts.VERIFICATION_QUESTION_PROMPT_LONG)
verification_question_generation_chain = LLMChain(llm=self.llm,
prompt=verification_question_generation_prompt_template,
output_key="verification_questions")
# Create execution verification
execute_verification_question_prompt_template = PromptTemplate(input_variables=["verification_questions"],
template=prompts.EXECUTE_PLAN_PROMPT)
execute_verification_question_chain = ExecuteVerificationChain(llm=self.llm,
prompt=execute_verification_question_prompt_template,
output_key="verification_answers")
# Create final refined response
final_answer_prompt_template = PromptTemplate(input_variables=["original_question",
"baseline_response",
"verification_answers"],
template=prompts.FINAL_REFINED_PROMPT)
final_answer_chain = LLMChain(llm=self.llm,
prompt=final_answer_prompt_template,
output_key="final_answer")
# Create sequesntial chain
long_form_cove_chain = SequentialChain(
chains=[baseline_response_chain,
verification_question_generation_chain,
execute_verification_question_chain,
final_answer_chain],
input_variables=["original_question"],
# Here we return multiple variables
output_variables=["original_question",
"baseline_response",
"verification_questions",
"verification_answers",
"final_answer"],
verbose=False)
return long_form_cove_chain
\ No newline at end of file
# from __future__ import annotations
import os
import re
import itertools
# import openai
import vertexai
import tiktoken
import json
from dotenv import load_dotenv
from langchain.llms import VertexAI
from typing import Any, Dict, List, Optional
from pydantic import Extra
from langchain.schema.language_model import BaseLanguageModel
from langchain.callbacks.manager import (
AsyncCallbackManagerForChainRun,
CallbackManagerForChainRun,
)
from langchain.schema import (
AIMessage,
HumanMessage,
SystemMessage
)
from langchain.chains.base import Chain
from langchain.prompts.base import BasePromptTemplate
from langchain.tools import DuckDuckGoSearchRun
import langchain
from langchain.chat_models import ChatVertexAI
from langchain.tools import DuckDuckGoSearchRun
from langchain.schema import (
AIMessage,
HumanMessage,
SystemMessage
)
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import SequentialChain
import prompts
class ExecuteVerificationChain(Chain):
"""
Implements the logic to execute the verification question for factual acuracy
"""
prompt: BasePromptTemplate
llm: BaseLanguageModel
input_key: str = "verification_questions"
output_key: str = "verification_answers"
use_search_tool: bool = True
search_tool: Any = DuckDuckGoSearchRun()
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
arbitrary_types_allowed = True
@property
def input_keys(self) -> List[str]:
"""Will be whatever keys the prompt expects.
:meta private:
"""
return [self.input_key]
@property
def output_keys(self) -> List[str]:
"""Will always return text key.
:meta private:
"""
return [self.output_key]
def search_for_verification_question(self,
verification_question: str
) -> str:
search_result = self.search_tool.run(verification_question)
return search_result
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, str]:
verification_answers_list = list() # Will contain the answers of each verification questions
question_answer_pair = "" # Final output of verification question and answer pair
# Convert all the verification questions into a list of string
sub_inputs = {k:v for k,v in inputs.items() if k==self.input_key}
verification_questions_prompt_value = self.prompt.format_prompt(**sub_inputs)
verification_questions_str = verification_questions_prompt_value.text
verification_questions_list = verification_questions_str.split("\n")
# Setting up prompt for both search tool and llm self evaluation
execution_prompt_search_tool = PromptTemplate.from_template(prompts.EXECUTE_PLAN_PROMPT_SEARCH_TOOL)
execution_prompt_self_llm = PromptTemplate.from_template(prompts.EXECUTE_PLAN_PROMPT_SELF_LLM)
# Executing the verification questions, either using search tool or self llm
for question in verification_questions_list:
if self.use_search_tool:
search_result = self.search_for_verification_question(question)
execution_prompt_value = execution_prompt_search_tool.format_prompt(**{"search_result": search_result, "verification_question": question})
else:
execution_prompt_value = execution_prompt_self_llm.format_prompt(**{"verification_question": question})
verification_answer_llm_result = self.llm.generate_prompt([execution_prompt_value], callbacks=run_manager.get_child() if run_manager else None)
verification_answer_str = verification_answer_llm_result.generations[0][0].text
verification_answers_list.append(verification_answer_str)
# Create verification question and answer pair
for question, answer in itertools.zip_longest(verification_questions_list, verification_answers_list):
question_answer_pair += "Question: {} Answer: {}\n".format(question, answer)
if run_manager:
run_manager.on_text("Log something about this run")
return {self.output_key: question_answer_pair}
async def _acall(
self,
inputs: Dict[str, Any],
run_manager: Optional[AsyncCallbackManagerForChainRun] = None,
) -> Dict[str, str]:
# Your custom chain logic goes here
# This is just an example that mimics LLMChain
prompt_value = self.prompt.format_prompt(**inputs)
# Whenever you call a language model, or another chain, you should pass
# a callback manager to it. This allows the inner run to be tracked by
# any callbacks that are registered on the outer run.
# You can always obtain a callback manager for this by calling
# `run_manager.get_child()` as shown below.
response = await self.llm.agenerate_prompt(
[prompt_value], callbacks=run_manager.get_child() if run_manager else None
)
# If you want to log something about this run, you can do so by calling
# methods on the `run_manager`, as shown below. This will trigger any
# callbacks that are registered for that event.
if run_manager:
await run_manager.on_text("Log something about this run")
return {self.output_key: response.generations[0][0].text}
@property
def _chain_type(self) -> str:
return "execute_verification_chain"
\ No newline at end of file
main.py 0 → 100644
import argparse
from dotenv import load_dotenv
# from pprint import pprint
from vertexai.language_models import ChatMessage
from langchain.llms import VertexAI
from langchain.chat_models import ChatVertexAI
from route_chain import RouteCOVEChain
load_dotenv("/workspace/.env")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Chain of Verification (CoVE) parser.')
parser.add_argument('--question',
type=str,
required=True,
help='The original question the user wants to ask')
parser.add_argument('--llm-name',
type=str,
required=False,
default="chat-bison@001",
help='The vertexai llm name')
parser.add_argument('--temperature',
type=float,
required=False,
default=0.1,
help='The temperature of the llm')
parser.add_argument('--max-tokens',
type=int,
required=False,
default=1024,
help='The max_tokens of the llm')
parser.add_argument('--show-intermediate-steps',
type=bool,
required=False,
default=True,
help='The max_tokens of the llm')
args = parser.parse_args()
original_query = args.question
chain_llm = ChatVertexAI(model_name=args.llm_name,
temperature=args.temperature,
max_tokens=args.max_tokens)
route_llm = ChatVertexAI(model_name="chat-bison@001",
temperature=0.1,
max_tokens=1024)
router_cove_chain_instance = RouteCOVEChain(original_query, route_llm, chain_llm, args.show_intermediate_steps)
router_cove_chain = router_cove_chain_instance()
router_cove_chain_result = router_cove_chain({"original_question": original_query})
if args.show_intermediate_steps:
# Print intermediate steps
print("\n" + 60 * "#" + " Intermediate Steps " + 60 * "#" + "\n")
# Modify the order of keys as needed
intermediate_steps = [
"Question: " + router_cove_chain_result["original_question"],
"Baseline response: " + router_cove_chain_result["baseline_response"],
# "Verification question template: " + router_cove_chain_result["verification_question_template"],
"Verification questions: " + router_cove_chain_result["verification_questions"],
"Verification answers: " + router_cove_chain_result["verification_answers"],
# Add more steps as needed
]
for step in intermediate_steps:
print(step)
print("\n" + 60 * "#" + " End of Intermediate Steps " + 60 * "#" + "\n")
# Print the final answer
print("Final Answer: {}".format(router_cove_chain_result["final_answer"]))
######################################################################## BASELINE PROMPTS ########################################################################
BASELINE_PROMPT_WIKI = """Answer the below question which is asking for a list of entities (names, places, locations etc). Output should be a numbered list and only contains the relevant & concise enitites as answer. NO ADDITIONAL DETAILS.
Question: {original_question}
Answer:"""
BASELINE_PROMPT_MULTI = """Answer the below question correctly and in a concise manner without much details. Only answer what the question is asked.
Question: {original_question}
Answer:"""
BASELINE_PROMPT_LONG = """Answer the below question correctly.
Question: {original_question}
Answer:"""
################################################################### PLAN VERIFICATION PROMPTS ###################################################################
VERIFICATION_QUESTION_TEMPLATE_PROMPT_WIKI = """Your task is to create a verification question based on the below question provided.
Example Question: Who are some movie actors who were born in Boston?
Example Verification Question: Was [movie actor] born in [Boston]
Explanation: In the above example the verification question focused only on the ANSWER_ENTITY (name of the movie actor) and QUESTION_ENTITY (birth place).
Similarly you need to focus on the ANSWER_ENTITY and QUESTION_ENTITY from the actual question and generate verification question.
Actual Question: {original_question}
Final Verification Question:"""
VERIFICATION_QUESTION_PROMPT_WIKI = """Your task is to create a series of verification questions based on the below question, the verfication question template and baseline response.
Example Question: Who are some movie actors who were born in Boston?
Example Verification Question Template: Was [movie actor] born in Boston?
Example Baseline Response: 1. Matt Damon - Famous for his roles in films like "Good Will Hunting," "The Bourne Identity" series, and "The Martian," Damon is an Academy Award-winning actor, screenwriter, and producer.
2. Chris Evans - Famous for his portrayal of Captain America in the Marvel Cinematic Universe, Evans has also appeared in movies like "Snowpiercer" and "Knives Out."
Verification questions: 1. Was Matt Damon born in Boston?
2. Was Chirs Evans born in Boston?
etc.
Example Verification Question: 1. Was Matt Damon born in Boston?
2. Was Chris Evans born in Boston?
Explanation: In the above example the verification questions focused only on the ANSWER_ENTITY (name of the movie actor) and QUESTION_ENTITY (birth place) based on the template and substitutes entity values from the baseline response.
Similarly you need to focus on the ANSWER_ENTITY and QUESTION_ENTITY from the actual question and substitute the entity values from the baseline response to generate verification questions.
Actual Question: {original_question}
Baseline Response: {baseline_response}
Verification Question Template: {verification_question_template}
Final Verification Questions:"""
VERIFICATION_QUESTION_PROMPT_MULTI = """Your task is to create verification questions based on the below original question and the baseline response. The verification questions are meant for verifying the factual acuracy in the baseline response.
Example Question: Who invented the first printing press and in what year?
Example Baseline Response: Johannes Gutenberg, 1450.
Example Verification Questions: 1. Did Johannes Gutenberg invent first printing press?
2. Did Johannes Gutenberg invent first printing press in the year 1450?
Explanation: The verification questions are highly aligned with both the actual question and baseline response. The actual question is comprises of multiple independent questions which in turn has multiple independent answers in the baseline response. Hence, the verification questions should also be independent for factual verification.
Actual Question: {original_question}
Baseline Response: {baseline_response}
Final Verification Questions:"""
VERIFICATION_QUESTION_PROMPT_LONG = """Your task is to create verification questions based on the below original question and the baseline response. The verification questions are meant for verifying the factual acuracy in the baseline response. Output should be numbered list of verification questions.
Actual Question: {original_question}
Baseline Response: {baseline_response}
Final Verification Questions:"""
################################################################## EXECUTE VERIFICATION PROMPTS ##################################################################
EXECUTE_PLAN_PROMPT_SEARCH_TOOL = """Answer the following question correctly based on the provided context. The question could be tricky as well, so think step by step and answer it correctly.
Context: {search_result}
Question: {verification_question}
Answer:"""
EXECUTE_PLAN_PROMPT_SELF_LLM = """Answer the following question correctly.
Question: {verification_question}
Answer:"""
EXECUTE_PLAN_PROMPT = "{verification_questions}"
################################################################## FINAL REFINED PROMPTS ##################################################################
FINAL_REFINED_PROMPT = """Given the below `Original Query` and `Baseline Answer`, analyze the `Verification Questions & Answers` to finally filter the refined answer.
Original Query: {original_question}
Baseline Answer: {baseline_response}
Verification Questions & Answer Pairs:
{verification_answers}
Final Refined Answer:"""
################################################################## ROUTER PROMPTS ##################################################################
ROUTER_CHAIN_PROMPT = """Please classify the below question in on of the following categories. The output should be a JSON as shown in the Examples.
Categories:
WIKI_CHAIN: Good for answering questions which asks for a list or set of entites as its answer.
MULTI_CHAIN: Good for answering questions which comprises of questions that have multiple independent answers (derived from a series of multiple discontiguous spans in the text) and multiple questions are asked in the original question.
LONG_CHAIN: Good for answering questions whose answer is long.
Examples:
WIKI_CHAIN:
Question: Name some Endemic orchids of Vietnam.
JSON Output: {{"category": "WIKI_CHAIN"}}
Question: Who are the scientist won nobel prize in the year 1970?
JSON Output: {{"category": "WIKI_CHAIN"}}
Question: List some cricket players who are playing in indian cricket team.
JSON Output: {{"category": "WIKI_CHAIN"}}
MULTI_CHAIN:
Question: Who is known for developing the theory of relativity, and in which year was it introduced?
JSON Output: {{"category": "MULTI_CHAIN"}}
Question: Who is credited with inventing the telephone, and when did this invention take place?
JSON Output: {{"category": "MULTI_CHAIN"}}
Question: Who was the first person to orbit the Earth in space, and during which year did this historic event occur?
JSON Output: {{"category": "MULTI_CHAIN"}}
LONG_CHAIN:
Question: Write few lines about Einstein.
JSON Output: {{"category": "LONG_CHAIN"}}
Question: Tell me in short about first moon landing.
JSON Output: {{"category": "LONG_CHAIN"}}
Question: Write a short biography of Carl Marx.
JSON Output: {{"category": "LONG_CHAIN"}}
Actual Question: {}
Final JSON Output:"""
import json
from langchain.chains.router import MultiPromptChain
from langchain.chains.llm import LLMChain
from langchain.chains import ConversationChain
from langchain.prompts import PromptTemplate
from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser
from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE
from langchain.schema import (
AIMessage,
HumanMessage,
SystemMessage
)
from cove_chains import (
WikiDataCategoryListCOVEChain,
MultiSpanCOVEChain,
LongFormCOVEChain
)
import prompts
class RouteCOVEChain(object):
def __init__(self, question, llm, chain_llm, show_intermediate_steps):
self.llm = llm
self.question = question
self.show_intermediate_steps = show_intermediate_steps
wiki_data_category_list_cove_chain_instance = WikiDataCategoryListCOVEChain(chain_llm)
wiki_data_category_list_cove_chain = wiki_data_category_list_cove_chain_instance()
multi_span_cove_chain_instance = MultiSpanCOVEChain(chain_llm)
multi_span_cove_chain = multi_span_cove_chain_instance()
long_form_cove_chain_instance = LongFormCOVEChain(chain_llm)
long_form_cove_chain = long_form_cove_chain_instance()
self.destination_chains = {
"WIKI_CHAIN": wiki_data_category_list_cove_chain,
"MULTI_CHAIN": multi_span_cove_chain,
"LONG_CHAIN": long_form_cove_chain
}
self.default_chain = ConversationChain(llm=chain_llm, output_key="final_answer")
def __call__(self):
route_message = [HumanMessage(content=prompts.ROUTER_CHAIN_PROMPT.format(self.question))]
response = self.llm(route_message)
response_str = response.content
try:
chain_dict = json.loads(response_str)
try:
if self.show_intermediate_steps:
print("Chain selected: {}".format(chain_dict["category"]))
return self.destination_chains[chain_dict["category"]]
except KeyError:
if self.show_intermediate_steps:
print("KeyError! Switching back to default chain. `ConversationChain`!")
return self.default_chain
except json.JSONDecodeError:
if self.show_intermediate_steps:
print("JSONDecodeError! Switching back to default chain. `ConversationChain`!")
return self.default_chain
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment