commit 0762a220875890223346c1e2d17c867524ec8680 from: Matthias L. Jugel date: Sat Jul 12 22:30:16 2025 UTC easier configuration, better responses commit - bbb301914b711755897a6000f8f68aea62bb8a2e commit + 0762a220875890223346c1e2d17c867524ec8680 blob - 5d850b59c2c32db1e9c1152b34ca49ae97d17a48 blob + 0f2d9245df28f973652682991fb7e648086a2e72 --- .gitignore +++ .gitignore @@ -4,5 +4,5 @@ __pycache__ .idea venv backup -chroma -static/files/* \ No newline at end of file +chroma/* +static/files/* blob - /dev/null blob + 2a78adbcc6f879f78cc1ecdc6121d01d77a45982 (mode 644) --- /dev/null +++ CHANGELOG.md @@ -0,0 +1,27 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +## [Unreleased] + +### Added +- Environment variable support for configuration +- Structured citation system in responses +- Message-based prompt template system +- New Ollama-related configuration variables +- Enhanced CSS styling for better user experience + +### Changed +- Updated context formatting to include document metadata +- Simplified query response structure +- Improved file path handling in citations +- Enhanced UI responsiveness +- Streamlined response handling in interface + +### Removed +- Separate references field from response structure +- Reset method from RagBackend class + +### Fixed +- Path specifications in .gitignore +- CSS formatting and styling issues \ No newline at end of file blob - 16a934cd6277ab0d51f850ea3e82d6dc805a8120 blob + 27a1fb414f93866c7a12fe643e52e7efe1a0d287 --- configuration.py +++ configuration.py @@ -1,18 +1,45 @@ +import os + from langchain_ollama import OllamaEmbeddings # default paths -DB_PATH: str = "chroma" -FILE_PATH: str = "static/files" +DB_PATH: str = os.getenv("RAG_DB_PATH", "chroma") +FILE_PATH: str = os.getenv("RAG_STATIC_FILES", "static/files") +# Ollama address +OLLAMA_URL = os.getenv('RAG_OLLAMA_SERVER_URL', "http://localhost:11434") +OLLAMA_MODEL = os.getenv('RAG_OLLAMA_MODEL', "llama3") +OLLAMA_EMBEDDINGS_MODEL = os.getenv('RAG_OLLAMA_EMBEDDINGS_MODEL', "nomic-embed-text") -# prompt template -PROMPT_TEMPLATE: str = """ -Do not include any introductory or closing remarks. -Use Markdown for all formatting (e.g., bold, italics, code blocks, lists, links). -If there is not definitive answer or the the question is unclear, ask questions to narrow down on the answer. -Answer the question (section QUESTION) based only on the following context (section CONTEXT) and the history of our conversation -(section HISTORY: +# prompt templates +SYSTEM_PROMPT: str = r""" +* Do not include introductory or closing remarks. +* You are acting as a sparing partner for a roleplaying game master. +* Answer the question using only the contents of the CONTEXT and HISTORY sections. Do not mention the sections. +* Use Markdown for all formatting (e.g., bold, italics, code blocks, lists, links). +* If the question is unclear, ask clarifying questions. +IMPORTANT: +Each document in the CONTEXT section starts with `DOCUMENT: ` followed by the document ID. +It is followed by an empty line and the document text. +An example of a document is: +``` +--- +DOCUMENT: some-file.pdf:1 + +Some text of the document +``` + +List the document IDs from the CONTEXT section that were used in the answer at the end. +Never include the same document ID twice. +An example of the sources section is: +``` +### Sources: +* some-file.pdf:1 +``` + +""" +HUMAN_TEMPLATE: str = r""" # CONTEXT {context} @@ -20,11 +47,15 @@ Answer the question (section QUESTION) based only on t {history} # QUESTION -You are acting as a sparing partner for a roleplaying game master. -Answer the question based on the above context and history: {question} +Answer the question based on the above context and history: +{question} """ def embeddings(): - embeddings = OllamaEmbeddings(model="nomic-embed-text") + embeddings = OllamaEmbeddings( + base_url=OLLAMA_URL, + model=OLLAMA_EMBEDDINGS_MODEL, + # temperature=0.0, + ) return embeddings blob - e6c9ee3b06423ff23974eeb7819fe41f4a89d705 blob + 41c204b908758b6918f49614f651c5a3c53f5e78 --- rag_backend.py +++ rag_backend.py @@ -7,34 +7,44 @@ from langchain_chroma import Chroma from langchain.prompts import ChatPromptTemplate from langchain_community.document_loaders import TextLoader, PyPDFDirectoryLoader from langchain_core.documents import Document +from langchain_core.messages import SystemMessage +from langchain_core.prompts import HumanMessagePromptTemplate from langchain_ollama import OllamaLLM from langchain_text_splitters import RecursiveCharacterTextSplitter -from configuration import embeddings, DB_PATH, PROMPT_TEMPLATE +from configuration import embeddings, DB_PATH, HUMAN_TEMPLATE, OLLAMA_MODEL, OLLAMA_URL, SYSTEM_PROMPT class RagBackend: - def __init__(self, db_path: str = None): - self.db_path = db_path if db_path else DB_PATH + def __init__(self, db_path: str = DB_PATH, ollama_url: str = OLLAMA_URL, model=OLLAMA_MODEL): self.db = Chroma( - persist_directory=self.db_path, + persist_directory=db_path, embedding_function=embeddings(), client_settings=Settings(anonymized_telemetry=False) ) - self.model = OllamaLLM(model="llama3") + self.model = OllamaLLM(base_url=ollama_url, model=model) def query(self, query_text: str, history: str) -> (str, list[str]): - # look up possible context from the index context_docs = self.db.similarity_search_with_score(query_text, k=5) - context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in context_docs]) - prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE) + # Format context from filtered documents + context_text = "\n\n---\n".join([ + f"DOCUMENT: {':'.join(doc.metadata.get('id').split(':')[:2])}\n\n{doc.page_content}" for + doc, _score in context_docs]) + + prompt_template = ChatPromptTemplate.from_messages([ + SystemMessage(content=SYSTEM_PROMPT), + HumanMessagePromptTemplate.from_template(HUMAN_TEMPLATE)]) prompt = prompt_template.format(context=context_text, history=history, question=query_text) + # print("================") + # print(prompt) + # print("================") response_text = self.model.invoke(prompt) + # print("----------------") + # print(response_text) + # print("----------------") + return response_text - sources = [doc.metadata.get("id", None) for doc, _score in context_docs] - return [response_text, sources] - @staticmethod def load_pdf_documents(path: str) -> list[Document]: return PyPDFDirectoryLoader(Path(path)).load() @@ -100,7 +110,7 @@ class RagBackend: current_chunk_index = 0 for chunk in chunks: - source = chunk.metadata.get("source") + source = Path(chunk.metadata.get("source")).name page = chunk.metadata.get("page") current_page_id = f"{source}:{page}" @@ -119,10 +129,7 @@ class RagBackend: return chunks - def reset(self) -> None: - self.db.reset_collection() - if __name__ == "__main__": logging.basicConfig( level=logging.INFO, blob - fc8369a5e6dd63413cfa96ed393d27f6ba8dda21 blob + 1df8c9a7c238359be87915a3bece49b191f1f4a7 --- rag_indexer.py +++ rag_indexer.py @@ -1,5 +1,6 @@ import argparse import logging +import shutil import sys from configuration import DB_PATH @@ -22,16 +23,20 @@ if __name__ == "__main__": parser.print_help() sys.exit(1) - indexer = RagBackend(args.db) if args.reset: - logging.info("deleting RAG indexer collection") - indexer.reset() - logging.info("indexer collection deleted") + try: + shutil.rmtree(DB_PATH) + logging.info("database deleted") + except FileNotFoundError: + pass + indexer = RagBackend(args.db) for source in args.sources: logging.info(f"searching {source}") pdf_docs = indexer.load_pdf_documents(source) + pdf_docs = indexer.split_documents(pdf_docs) indexer.add_to_index(pdf_docs) text_docs = indexer.load_text_documents(source) + text_docs = indexer.split_documents(text_docs) indexer.add_to_index(text_docs) logging.info(f"added {len(text_docs)} text documents to index") blob - 084a7fa2fb50bb73d3c1cc670e39ce79417e01c4 blob + 6488fa41e5750ac622af34ae8b0c9c30d553a31d --- rag_interface.py +++ rag_interface.py @@ -26,12 +26,11 @@ def handle_mcp(): params = data.get('params', {}) # Dispatch to the correct tool if method == "llm_chat": - result, refs = rag.query(params.get('query', ''), params.get('history', '')) + result = rag.query(params.get('query', ''), params.get('history', '')) return jsonify({ "jsonrpc": "2.0", "result": { - 'text': result, - 'ref': refs + 'text': result }, "id": data.get("id") }) blob - c51160a44bcbdf3037f850b08e4a5a7d08efbc4b blob + a668a2e62eb42faa91582421e5e2abae6bed247b --- requirements.txt +++ requirements.txt @@ -1,8 +1,8 @@ pypdf +langchain langchain-community langchain-ollama langchain-chroma flask chromadb -pytest -mcp[cli] \ No newline at end of file +pytest \ No newline at end of file blob - 8a95ebf26866fb6afc2dc76c1f1a36e26976fe26 blob + 88329261c0ecff8190c540617f5e4ab7ed601a0c --- templates/page.html +++ templates/page.html @@ -14,6 +14,7 @@ background: #f4f7fa; box-sizing: border-box; } + #container { height: 100vh; width: 100vw; @@ -21,12 +22,14 @@ flex-direction: column; background: #fff; } + #chat-container { display: flex; flex-direction: column; height: 100vh; width: 100vw; } + #header { background: #4a90e2; color: #fff; @@ -34,6 +37,7 @@ font-size: 1.3em; letter-spacing: 1px; } + #chat { flex: 1 1 auto; padding: 32px 24px; @@ -42,38 +46,46 @@ flex-direction: column; gap: 10px; } + .msg { display: flex; margin-bottom: 8px; } + .msg.user { justify-content: flex-end; } + .msg.bot { justify-content: flex-start; } + .bubble { padding: 12px 16px; border-radius: 18px; max-width: 70%; word-wrap: break-word; } + .bubble.user { background: #e1f5fe; color: #222; border-bottom-right-radius: 4px; } + .bubble.bot { background: #f1f0f0; color: #222; border-bottom-left-radius: 4px; } + #input-area { display: flex; border-top: 1px solid #eee; padding: 16px 24px; background: #fafbfc; } + #input { flex: 1; padding: 10px; @@ -85,6 +97,7 @@ line-height: 1.2; box-sizing: border-box; } + #send { margin-left: 10px; padding: 10px 20px; @@ -95,21 +108,25 @@ font-size: 1em; cursor: pointer; } + #send:disabled { background: #a0c8f0; cursor: not-allowed; } + #spinner { display: none; margin-left: 10px; align-self: center; } + .lds-ring { display: inline-block; position: relative; width: 24px; height: 24px; } + .lds-ring div { box-sizing: border-box; display: block; @@ -122,21 +139,38 @@ animation: lds-ring 1.2s linear infinite; border-color: #4a90e2 transparent transparent transparent; } - .lds-ring div:nth-child(1) { animation-delay: -0.45s; } - .lds-ring div:nth-child(2) { animation-delay: -0.3s; } - .lds-ring div:nth-child(3) { animation-delay: -0.15s; } + + .lds-ring div:nth-child(1) { + animation-delay: -0.45s; + } + + .lds-ring div:nth-child(2) { + animation-delay: -0.3s; + } + + .lds-ring div:nth-child(3) { + animation-delay: -0.15s; + } + @keyframes lds-ring { - 0% { transform: rotate(0deg); } - 100% { transform: rotate(360deg); } + 0% { + transform: rotate(0deg); + } + 100% { + transform: rotate(360deg); + } } + @media (max-width: 700px) { #container, #chat-container { height: 100vh; width: 100vw; } + #chat { padding: 16px 8px; } + #input-area { padding: 12px 8px; } @@ -166,7 +200,7 @@ const send = document.getElementById('send'); const spinner = document.getElementById('spinner'); - function appendMessage(sender, text) { + function appendMessage(sender, text, references = []) { const msgDiv = document.createElement('div'); msgDiv.className = 'msg ' + sender; const bubble = document.createElement('div'); @@ -192,13 +226,13 @@ const requestPayload = { jsonrpc: "2.0", method: "llm_chat", - params: { query: query, history: historyText }, + params: {query: query, history: historyText}, id: requestId }; const response = await fetch('/mcp', { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: {'Content-Type': 'application/json'}, body: JSON.stringify(requestPayload) }); @@ -215,7 +249,7 @@ return data.result; } - send.onclick = async function() { + send.onclick = async function () { const text = input.value.trim(); if (!text) return; appendMessage('user', text); @@ -231,7 +265,7 @@ try { const result = await sendMCPMessage(text, context_prompt); appendMessage('bot', result.text); - history.push({ question: text, answer: result.text }); + history.push({question: text, answer: result.text}); } catch (error) { appendMessage('bot', `Error: ${error.message}`); } finally { @@ -242,17 +276,17 @@ // input.addEventListener("keyup", function(event) { // if (event.key === "Enter" && !send.disabled) send.click(); // }); - input.addEventListener("keydown", function(event) { + input.addEventListener("keydown", function (event) { if (event.key === "Enter" && (event.metaKey || event.ctrlKey)) { - if(send.disabled) return; + if (send.disabled) return; event.preventDefault(); send.click(); } }); - input.addEventListener('input', function() { + input.addEventListener('input', function () { this.style.height = 'auto'; // Reset height this.style.height = (this.scrollHeight) + 'px'; // Set to scroll height }); - + \ No newline at end of file