Documentation Index
Fetch the complete documentation index at: https://spacesail.mintlify.app/llms.txt
Use this file to discover all available pages before exploring further.
Code
import asyncio
from agno.knowledge.embedder.vllm import VLLMEmbedder
from agno.knowledge.knowledge import Knowledge
from agno.vectordb.pgvector import PgVector
def main():
# Basic usage - get embeddings directly
embeddings = VLLMEmbedder(
id="intfloat/e5-mistral-7b-instruct",
dimensions=4096,
enforce_eager=True,
vllm_kwargs={
"disable_sliding_window": True,
"max_model_len": 4096,
},
).get_embedding("The quick brown fox jumps over the lazy dog.")
# Print the embeddings and their dimensions
print(f"Embeddings: {embeddings[:5]}")
print(f"Dimensions: {len(embeddings)}")
# Local Mode with Knowledge
knowledge = Knowledge(
vector_db=PgVector(
db_url="postgresql+psycopg://ai:ai@localhost:5532/ai",
table_name="vllm_embeddings",
embedder=VLLMEmbedder(
id="intfloat/e5-mistral-7b-instruct",
dimensions=4096,
enforce_eager=True,
vllm_kwargs={
"disable_sliding_window": True,
"max_model_len": 4096,
},
),
),
max_results=2,
)
# Remote mode with Knowledge
knowledge_remote = Knowledge(
vector_db=PgVector(
db_url="postgresql+psycopg://ai:ai@localhost:5532/ai",
table_name="vllm_embeddings_remote",
embedder=VLLMEmbedder(
id="intfloat/e5-mistral-7b-instruct",
dimensions=4096,
base_url="http://localhost:8000/v1",
api_key="your-api-key", # Optional
),
),
max_results=2,
)
asyncio.run(
knowledge.add_content_async(
path="cookbook/knowledge/testing_resources/cv_1.pdf",
)
)
if __name__ == "__main__":
main()
Usage
Create a virtual environment
Open the Terminal and create a python virtual environment.python3 -m venv .venv
source .venv/bin/activate
Install libraries
pip install -U agno vllm openai sqlalchemy psycopg[binary] pgvector pypdf
Run PgVector
docker run -d \
-e POSTGRES_DB=ai \
-e POSTGRES_USER=ai \
-e POSTGRES_PASSWORD=ai \
-e PGDATA=/var/lib/postgresql/data/pgdata \
-v pgvolume:/var/lib/postgresql/data \
-p 5532:5432 \
--name pgvector \
agno/pgvector:16
Notes
- This example uses local mode where vLLM loads the model directly (no server needed)
- For remote mode, the code includes
knowledge_remote example with base_url parameter
- GPU with ~14GB VRAM required for e5-mistral-7b-instruct model
- For CPU-only or lower memory, use smaller models like
BAAI/bge-small-en-v1.5
- Models are automatically downloaded from HuggingFace on first use