Visualizzazione post con etichetta Mistral. Mostra tutti i post
Visualizzazione post con etichetta Mistral. Mostra tutti i post

martedì 17 settembre 2024

RAG con Ollama Mistral e LangChain

Una altra prova usando questo repository https://github.com/CallumJMac/lessons

Il folder di riferimento e'  lessons/1. RAG/examples/pixegami /PDF_files_langchain/rag-tutorial-v2-main

I files Pdf vanno messi nel folder data

Poi si lancia populate_database.py. La persistenza e' data da ChromaDB

import argparse
import os
import shutil
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from get_embedding_function import get_embedding_function
from langchain.vectorstores.chroma import Chroma


CHROMA_PATH = "chroma"
DATA_PATH = "data"


def main():

# Check if the database should be cleared (using the --clear flag).
parser = argparse.ArgumentParser()
parser.add_argument("--reset", action="store_true", help="Reset the database.")
args = parser.parse_args()
if args.reset:
print("✨ Clearing Database")
clear_database()

# Create (or update) the data store.
documents = load_documents()
chunks = split_documents(documents)
add_to_chroma(chunks)


def load_documents():
document_loader = PyPDFDirectoryLoader(DATA_PATH)
return document_loader.load()


def split_documents(documents: list[Document]):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=800,
chunk_overlap=80,
length_function=len,
is_separator_regex=False,
)
return text_splitter.split_documents(documents)


def add_to_chroma(chunks: list[Document]):
# Load the existing database.
db = Chroma(
persist_directory=CHROMA_PATH,
embedding_function=get_embedding_function()
)

# Calculate Page IDs.
chunks_with_ids = calculate_chunk_ids(chunks)

# Add or Update the documents.
existing_items = db.get(include=[]) # IDs are always included by default
existing_ids = set(existing_items["ids"])
print(f"Number of existing documents in DB: {len(existing_ids)}")

# Only add documents that don't exist in the DB.
new_chunks = []
for chunk in chunks_with_ids:
if chunk.metadata["id"] not in existing_ids:
new_chunks.append(chunk)

if len(new_chunks):
print(f"👉 Adding new documents: {len(new_chunks)}")
new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks]
db.add_documents(new_chunks, ids=new_chunk_ids)
db.persist()
else:
print("✅ No new documents to add")


def calculate_chunk_ids(chunks):

# This will create IDs like "data/monopoly.pdf:6:2"
# Page Source : Page Number : Chunk Index

last_page_id = None
current_chunk_index = 0

for chunk in chunks:
source = chunk.metadata.get("source")
page = chunk.metadata.get("page")
current_page_id = f"{source}:{page}"

# If the page ID is the same as the last one, increment the index.
if current_page_id == last_page_id:
current_chunk_index += 1
else:
current_chunk_index = 0

# Calculate the chunk ID.
chunk_id = f"{current_page_id}:{current_chunk_index}"
last_page_id = current_page_id

# Add it to the page meta-data.
chunk.metadata["id"] = chunk_id

return chunks


def clear_database():
if os.path.exists(CHROMA_PATH):
shutil.rmtree(CHROMA_PATH)


if __name__ == "__main__":
main()

In seguito si puo' effettuare la query da linea di comando come 

python query_data.py "what's monopoly"


import argparse
from langchain.vectorstores.chroma import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_community.llms.ollama import Ollama

from get_embedding_function import get_embedding_function

CHROMA_PATH = "chroma"

PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""


def main():
# Create CLI.
parser = argparse.ArgumentParser()
parser.add_argument("query_text", type=str, help="The query text.")
args = parser.parse_args()
query_text = args.query_text
query_rag(query_text)


def query_rag(query_text: str):
# Prepare the DB.
embedding_function = get_embedding_function()
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

# Search the DB.
results = db.similarity_search_with_score(query_text, k=5)

context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=query_text)
# print(prompt)

model = Ollama(model="mistral")
response_text = model.invoke(prompt)

sources = [doc.metadata.get("id", None) for doc, _score in results]
formatted_response = f"Response: {response_text}\nSources: {sources}"
print(formatted_response)
return response_text


if __name__ == "__main__":
main()


giusto per dare un'idea questa e' la risposta

Response:  Monopoly is a property trading game from Parker Brothers designed for ages 8 and up, suitable for 2 to 8 players. The gameboard is used along with tokens, houses, hotels, Chance and Community Chest cards, Title Deed cards, play money, and a Banker's tray. Players can choose to play by the classic rules or use the Speed Die for faster gameplay. In Monopoly, the objective is to become the wealthiest player by buying, renting, and selling properties.


Usando LLama3:7b la risposta e' stata

A simple one!

According to the context, Monopoly is a "Property Trading Game" from Parker Brothers.

 





AnythingLLM

Partendo dal post precedente volevo provare un servizio da distribuire via Web. Su youtube molto esempi utilizzano streamlit ma frugando ho trovando un servizio chiavi in mano che 

1) permette l'upload dei pdf via web

2) istruisce la rete 

3) si puo' interagire sempre via web come chat

il servizio si chiama AnythingLLM ed e' distribuito sia come docker che come applicazione desktop (io ho preferisco la versione docker)

Attenzione : se si vedono strani errori (tipo  Could not respond to message.fetch failed) il problema puo' essere derivante da poca Ram. In un primo momento stavo provando Gemma (ho 16 Gb, Gemma ne richiede 9 ...pensavo di starci largo) ma il sistema ha iniziato a funzionare solo quando ho installato Mistral con Ollama

 

export STORAGE_LOCATION=$HOME/anythingllm && \
mkdir -p $STORAGE_LOCATION && \
touch "$STORAGE_LOCATION/.env" && \
docker run -d -p 3001:3001 \
--cap-add SYS_ADMIN \
-v ${STORAGE_LOCATION}:/app/server/storage \
-v ${STORAGE_LOCATION}/.env:/app/server/.env \

-e STORAGE_DIR="/app/server/storage" \

--add-host=host.docker.internal:host-gateway\
mintplexlabs/anythingllm


Inoltre su Linux per andare all'interfaccia web si puo' usare http://localhost:3001 

Al primo avvio si deve configurare il modello da usare (se locale come Ollama o remoto). In questa fase ho avuto qualche problema in quanto AnythingLLM non riusciva a trovare i modelli gia' installati sulla macchina tramite pull. La soluzione e' stata quella di usare l'IP http://172.17.0.1:11434 al posto di localhost


 

Al posto di localhost ho usato 

Eventuali conflitti con Ollama possono essere risolti da qui

Di seguito uno screenshot di una "conversazione" con Mistral




 

 

 

 

 

 

 

RAG con Ollama Mistral e LangChain

Una altra prova usando questo repository https://github.com/CallumJMac/lessons Il folder di riferimento e'  lessons/1. RAG/examples/pixe...