from __future__ import annotations
import re
from abc import abstractmethod
from typing import Any, Dict, List, Optional, Sequence, Tuple
import numpy as np
from langchain_community.llms.openai import OpenAI
from langchain_core.callbacks import (
CallbackManagerForChainRun,
)
from langchain_core.language_models import BaseLanguageModel
from langchain_core.outputs import Generation
from langchain_core.prompts import BasePromptTemplate
from langchain_core.pydantic_v1 import Field
from langchain_core.retrievers import BaseRetriever
from langchain.chains.base import Chain
from langchain.chains.flare.prompts import (
PROMPT,
QUESTION_GENERATOR_PROMPT,
FinishedOutputParser,
)
from langchain.chains.llm import LLMChain
class _ResponseChain(LLMChain):
"""Base class for chains that generate responses."""
prompt: BasePromptTemplate = PROMPT
@classmethod
def is_lc_serializable(cls) -> bool:
return False
@property
def input_keys(self) -> List[str]:
return self.prompt.input_variables
def generate_tokens_and_log_probs(
self,
_input: Dict[str, Any],
*,
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Tuple[Sequence[str], Sequence[float]]:
llm_result = self.generate([_input], run_manager=run_manager)
return self._extract_tokens_and_log_probs(llm_result.generations[0])
@abstractmethod
def _extract_tokens_and_log_probs(
self, generations: List[Generation]
) -> Tuple[Sequence[str], Sequence[float]]:
"""Extract tokens and log probs from response."""
class _OpenAIResponseChain(_ResponseChain):
"""Chain that generates responses from user input and context."""
llm: OpenAI = Field(
default_factory=lambda: OpenAI(
max_tokens=32, model_kwargs={"logprobs": 1}, temperature=0
)
)
def _extract_tokens_and_log_probs(
self, generations: List[Generation]
) -> Tuple[Sequence[str], Sequence[float]]:
tokens = []
log_probs = []
for gen in generations:
if gen.generation_info is None:
raise ValueError
tokens.extend(gen.generation_info["logprobs"]["tokens"])
log_probs.extend(gen.generation_info["logprobs"]["token_logprobs"])
return tokens, log_probs
[docs]class QuestionGeneratorChain(LLMChain):
"""Chain that generates questions from uncertain spans."""
prompt: BasePromptTemplate = QUESTION_GENERATOR_PROMPT
"""Prompt template for the chain."""
[docs] @classmethod
def is_lc_serializable(cls) -> bool:
return False
@property
def input_keys(self) -> List[str]:
"""Input keys for the chain."""
return ["user_input", "context", "response"]
def _low_confidence_spans(
tokens: Sequence[str],
log_probs: Sequence[float],
min_prob: float,
min_token_gap: int,
num_pad_tokens: int,
) -> List[str]:
_low_idx = np.where(np.exp(log_probs) < min_prob)[0]
low_idx = [i for i in _low_idx if re.search(r"\w", tokens[i])]
if len(low_idx) == 0:
return []
spans = [[low_idx[0], low_idx[0] + num_pad_tokens + 1]]
for i, idx in enumerate(low_idx[1:]):
end = idx + num_pad_tokens + 1
if idx - low_idx[i] < min_token_gap:
spans[-1][1] = end
else:
spans.append([idx, end])
return ["".join(tokens[start:end]) for start, end in spans]
[docs]class FlareChain(Chain):
"""Chain that combines a retriever, a question generator,
and a response generator."""
question_generator_chain: QuestionGeneratorChain
"""Chain that generates questions from uncertain spans."""
response_chain: _ResponseChain = Field(default_factory=_OpenAIResponseChain)
"""Chain that generates responses from user input and context."""
output_parser: FinishedOutputParser = Field(default_factory=FinishedOutputParser)
"""Parser that determines whether the chain is finished."""
retriever: BaseRetriever
"""Retriever that retrieves relevant documents from a user input."""
min_prob: float = 0.2
"""Minimum probability for a token to be considered low confidence."""
min_token_gap: int = 5
"""Minimum number of tokens between two low confidence spans."""
num_pad_tokens: int = 2
"""Number of tokens to pad around a low confidence span."""
max_iter: int = 10
"""Maximum number of iterations."""
start_with_retrieval: bool = True
"""Whether to start with retrieval."""
@property
def input_keys(self) -> List[str]:
"""Input keys for the chain."""
return ["user_input"]
@property
def output_keys(self) -> List[str]:
"""Output keys for the chain."""
return ["response"]
def _do_generation(
self,
questions: List[str],
user_input: str,
response: str,
_run_manager: CallbackManagerForChainRun,
) -> Tuple[str, bool]:
callbacks = _run_manager.get_child()
docs = []
for question in questions:
docs.extend(self.retriever.get_relevant_documents(question))
context = "\n\n".join(d.page_content for d in docs)
result = self.response_chain.predict(
user_input=user_input,
context=context,
response=response,
callbacks=callbacks,
)
marginal, finished = self.output_parser.parse(result)
return marginal, finished
def _do_retrieval(
self,
low_confidence_spans: List[str],
_run_manager: CallbackManagerForChainRun,
user_input: str,
response: str,
initial_response: str,
) -> Tuple[str, bool]:
question_gen_inputs = [
{
"user_input": user_input,
"current_response": initial_response,
"uncertain_span": span,
}
for span in low_confidence_spans
]
callbacks = _run_manager.get_child()
question_gen_outputs = self.question_generator_chain.apply(
question_gen_inputs, callbacks=callbacks
)
questions = [
output[self.question_generator_chain.output_keys[0]]
for output in question_gen_outputs
]
_run_manager.on_text(
f"Generated Questions: {questions}", color="yellow", end="\n"
)
return self._do_generation(questions, user_input, response, _run_manager)
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, Any]:
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
user_input = inputs[self.input_keys[0]]
response = ""
for i in range(self.max_iter):
_run_manager.on_text(
f"Current Response: {response}", color="blue", end="\n"
)
_input = {"user_input": user_input, "context": "", "response": response}
tokens, log_probs = self.response_chain.generate_tokens_and_log_probs(
_input, run_manager=_run_manager
)
low_confidence_spans = _low_confidence_spans(
tokens,
log_probs,
self.min_prob,
self.min_token_gap,
self.num_pad_tokens,
)
initial_response = response.strip() + " " + "".join(tokens)
if not low_confidence_spans:
response = initial_response
final_response, finished = self.output_parser.parse(response)
if finished:
return {self.output_keys[0]: final_response}
continue
marginal, finished = self._do_retrieval(
low_confidence_spans,
_run_manager,
user_input,
response,
initial_response,
)
response = response.strip() + " " + marginal
if finished:
break
return {self.output_keys[0]: response}
[docs] @classmethod
def from_llm(
cls, llm: BaseLanguageModel, max_generation_len: int = 32, **kwargs: Any
) -> FlareChain:
"""Creates a FlareChain from a language model.
Args:
llm: Language model to use.
max_generation_len: Maximum length of the generated response.
**kwargs: Additional arguments to pass to the constructor.
Returns:
FlareChain class with the given language model.
"""
question_gen_chain = QuestionGeneratorChain(llm=llm)
response_llm = OpenAI(
max_tokens=max_generation_len, model_kwargs={"logprobs": 1}, temperature=0
)
response_chain = _OpenAIResponseChain(llm=response_llm)
return cls(
question_generator_chain=question_gen_chain,
response_chain=response_chain,
**kwargs,
)