You can save on costs by caching your embeddings

from langchain_openai import AzureOpenAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from txtai import Embeddings
import numpy as np
from typing import List
from os import getenv
from dotenv import load_dotenv

load_dotenv()

Define your embeddings

openai_embeddings = AzureOpenAIEmbeddings(
    azure_deployment=getenv("EMBEDDINGS_NAME"),
    api_key=getenv("OPENAI_API_KEY"),
    azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
)

Define your cache

docs_store  = LocalFileStore("./docs_cache")
query_store  = LocalFileStore("./query_cache")

cached_embedder = CacheBackedEmbeddings.from_bytes_store(
    openai_embeddings, query_embedding_cache=query_store, namespace=openai_embeddings.model, document_embedding_cache=docs_store
)

docs_store is used when you call openai_embeddings.embed_documents and query_store is used when you call openai_embeddings .embed_query