Source code for langchain.indexes.vectorstore
from typing import Any, Dict, List, Optional, Type
from langchain_community.document_loaders.base import BaseLoader
from langchain_community.embeddings.openai import OpenAIEmbeddings
from langchain_community.llms.openai import OpenAI
from langchain_community.vectorstores.chroma import Chroma
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.language_models import BaseLanguageModel
from langchain_core.pydantic_v1 import BaseModel, Extra, Field
from langchain_core.vectorstores import VectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
from langchain.chains.retrieval_qa.base import RetrievalQA
def _get_default_text_splitter() -> TextSplitter:
return RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
[docs]class VectorStoreIndexWrapper(BaseModel):
"""Wrapper around a vectorstore for easy access."""
vectorstore: VectorStore
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
arbitrary_types_allowed = True
[docs] def query(
self,
question: str,
llm: Optional[BaseLanguageModel] = None,
retriever_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> str:
"""Query the vectorstore."""
llm = llm or OpenAI(temperature=0)
retriever_kwargs = retriever_kwargs or {}
chain = RetrievalQA.from_chain_type(
llm, retriever=self.vectorstore.as_retriever(**retriever_kwargs), **kwargs
)
return chain.run(question)
[docs] def query_with_sources(
self,
question: str,
llm: Optional[BaseLanguageModel] = None,
retriever_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> dict:
"""Query the vectorstore and get back sources."""
llm = llm or OpenAI(temperature=0)
retriever_kwargs = retriever_kwargs or {}
chain = RetrievalQAWithSourcesChain.from_chain_type(
llm, retriever=self.vectorstore.as_retriever(**retriever_kwargs), **kwargs
)
return chain({chain.question_key: question})
[docs]class VectorstoreIndexCreator(BaseModel):
"""Logic for creating indexes."""
vectorstore_cls: Type[VectorStore] = Chroma
embedding: Embeddings = Field(default_factory=OpenAIEmbeddings)
text_splitter: TextSplitter = Field(default_factory=_get_default_text_splitter)
vectorstore_kwargs: dict = Field(default_factory=dict)
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
arbitrary_types_allowed = True
[docs] def from_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrapper:
"""Create a vectorstore index from loaders."""
docs = []
for loader in loaders:
docs.extend(loader.load())
return self.from_documents(docs)
[docs] def from_documents(self, documents: List[Document]) -> VectorStoreIndexWrapper:
"""Create a vectorstore index from documents."""
sub_docs = self.text_splitter.split_documents(documents)
vectorstore = self.vectorstore_cls.from_documents(
sub_docs, self.embedding, **self.vectorstore_kwargs
)
return VectorStoreIndexWrapper(vectorstore=vectorstore)