You can save on costs by caching your embeddings
from langchain_openai import AzureOpenAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from txtai import Embeddings
import numpy as np
from typing import List
from os import getenv
from dotenv import load_dotenv
load_dotenv()
Define your embeddings
openai_embeddings = AzureOpenAIEmbeddings(
azure_deployment=getenv("EMBEDDINGS_NAME"),
api_key=getenv("OPENAI_API_KEY"),
azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
)
Define your cache
docs_store = LocalFileStore("./docs_cache")
query_store = LocalFileStore("./query_cache")
cached_embedder = CacheBackedEmbeddings.from_bytes_store(
openai_embeddings, query_embedding_cache=query_store, namespace=openai_embeddings.model, document_embedding_cache=docs_store
)
docs_store
is used when you call openai_embeddings.embed_documents
and query_store
is used when you call openai_embeddings .embed_query