thinkberg.com

Commit Diff

Commit:: 0762a220875890223346c1e2d17c867524ec8680
From:: Matthias L. Jugel <leo@thinkberg.com>
Date:: Sat Jul 12 22:30:16 2025 UTC
Message:: easier configuration, better responses
Actions:: Patch | Tree
commit - bbb301914b711755897a6000f8f68aea62bb8a2e
commit + 0762a220875890223346c1e2d17c867524ec8680
blob - 5d850b59c2c32db1e9c1152b34ca49ae97d17a48
blob + 0f2d9245df28f973652682991fb7e648086a2e72
--- .gitignore
+++ .gitignore
@@ -4,5 +4,5 @@ __pycache__
 .idea
 venv
 backup
-chroma
-static/files/*
\ No newline at end of file
+chroma/*
+static/files/*
blob - /dev/null
blob + 2a78adbcc6f879f78cc1ecdc6121d01d77a45982 (mode 644)
--- /dev/null
+++ CHANGELOG.md
@@ -0,0 +1,27 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+## [Unreleased]
+
+### Added
+- Environment variable support for configuration
+- Structured citation system in responses
+- Message-based prompt template system
+- New Ollama-related configuration variables
+- Enhanced CSS styling for better user experience
+
+### Changed
+- Updated context formatting to include document metadata
+- Simplified query response structure
+- Improved file path handling in citations
+- Enhanced UI responsiveness
+- Streamlined response handling in interface
+
+### Removed
+- Separate references field from response structure
+- Reset method from RagBackend class
+
+### Fixed
+- Path specifications in .gitignore
+- CSS formatting and styling issues
\ No newline at end of file
blob - 16a934cd6277ab0d51f850ea3e82d6dc805a8120
blob + 27a1fb414f93866c7a12fe643e52e7efe1a0d287
--- configuration.py
+++ configuration.py
@@ -1,18 +1,45 @@
+import os
+
 from langchain_ollama import OllamaEmbeddings
 
 # default paths
-DB_PATH: str = "chroma"
-FILE_PATH: str = "static/files"
+DB_PATH: str = os.getenv("RAG_DB_PATH", "chroma")
+FILE_PATH: str = os.getenv("RAG_STATIC_FILES", "static/files")
 
+# Ollama address
+OLLAMA_URL = os.getenv('RAG_OLLAMA_SERVER_URL', "http://localhost:11434")
+OLLAMA_MODEL = os.getenv('RAG_OLLAMA_MODEL', "llama3")
+OLLAMA_EMBEDDINGS_MODEL = os.getenv('RAG_OLLAMA_EMBEDDINGS_MODEL', "nomic-embed-text")
 
-# prompt template
-PROMPT_TEMPLATE: str = """
-Do not include any introductory or closing remarks. 
-Use Markdown for all formatting (e.g., bold, italics, code blocks, lists, links).
-If there is not definitive answer or the the question is unclear, ask questions to narrow down on the answer.
-Answer the question (section QUESTION) based only on the following context (section CONTEXT) and the history of our conversation
-(section HISTORY:
+# prompt templates
+SYSTEM_PROMPT: str = r"""
+* Do not include introductory or closing remarks.
+* You are acting as a sparing partner for a roleplaying game master.
+* Answer the question using only the contents of the CONTEXT and HISTORY sections. Do not mention the sections.
+* Use Markdown for all formatting (e.g., bold, italics, code blocks, lists, links).
+* If the question is unclear, ask clarifying questions.
 
+IMPORTANT:
+Each document in the CONTEXT section starts with `DOCUMENT: ` followed by the document ID.
+It is followed by an empty line and the document text.
+An example of a document is:
+```
+---
+DOCUMENT: some-file.pdf:1
+
+Some text of the document
+```
+
+List the document IDs from the CONTEXT section that were used in the answer at the end.
+Never include the same document ID twice. 
+An example of the sources section is:
+```
+### Sources:
+* some-file.pdf:1
+```
+
+"""
+HUMAN_TEMPLATE: str = r"""
 # CONTEXT 
 {context}
 
@@ -20,11 +47,15 @@ Answer the question (section QUESTION) based only on t
 {history}
 
 # QUESTION
-You are acting as a sparing partner for a roleplaying game master.
-Answer the question based on the above context and history: {question}
+Answer the question based on the above context and history: 
+{question}
 """
 
 
 def embeddings():
-    embeddings = OllamaEmbeddings(model="nomic-embed-text")
+    embeddings = OllamaEmbeddings(
+        base_url=OLLAMA_URL,
+        model=OLLAMA_EMBEDDINGS_MODEL,
+        # temperature=0.0,
+    )
     return embeddings
blob - e6c9ee3b06423ff23974eeb7819fe41f4a89d705
blob + 41c204b908758b6918f49614f651c5a3c53f5e78
--- rag_backend.py
+++ rag_backend.py
@@ -7,34 +7,44 @@ from langchain_chroma import Chroma
 from langchain.prompts import ChatPromptTemplate
 from langchain_community.document_loaders import TextLoader, PyPDFDirectoryLoader
 from langchain_core.documents import Document
+from langchain_core.messages import SystemMessage
+from langchain_core.prompts import HumanMessagePromptTemplate
 from langchain_ollama import OllamaLLM
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 
-from configuration import embeddings, DB_PATH, PROMPT_TEMPLATE
+from configuration import embeddings, DB_PATH, HUMAN_TEMPLATE, OLLAMA_MODEL, OLLAMA_URL, SYSTEM_PROMPT
 
 
 class RagBackend:
-    def __init__(self, db_path: str = None):
-        self.db_path = db_path if db_path else DB_PATH
+    def __init__(self, db_path: str = DB_PATH, ollama_url: str = OLLAMA_URL, model=OLLAMA_MODEL):
         self.db = Chroma(
-            persist_directory=self.db_path,
+            persist_directory=db_path,
             embedding_function=embeddings(),
             client_settings=Settings(anonymized_telemetry=False)
         )
-        self.model = OllamaLLM(model="llama3")
+        self.model = OllamaLLM(base_url=ollama_url, model=model)
 
     def query(self, query_text: str, history: str) -> (str, list[str]):
-        # look up possible context from the index
         context_docs = self.db.similarity_search_with_score(query_text, k=5)
 
-        context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in context_docs])
-        prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
+        # Format context from filtered documents
+        context_text = "\n\n---\n".join([
+            f"DOCUMENT: {':'.join(doc.metadata.get('id').split(':')[:2])}\n\n{doc.page_content}" for
+            doc, _score in context_docs])
+
+        prompt_template = ChatPromptTemplate.from_messages([
+            SystemMessage(content=SYSTEM_PROMPT),
+            HumanMessagePromptTemplate.from_template(HUMAN_TEMPLATE)])
         prompt = prompt_template.format(context=context_text, history=history, question=query_text)
+        # print("================")
+        # print(prompt)
+        # print("================")
         response_text = self.model.invoke(prompt)
+        # print("----------------")
+        # print(response_text)
+        # print("----------------")
+        return response_text
 
-        sources = [doc.metadata.get("id", None) for doc, _score in context_docs]
-        return [response_text, sources]
-
     @staticmethod
     def load_pdf_documents(path: str) -> list[Document]:
         return PyPDFDirectoryLoader(Path(path)).load()
@@ -100,7 +110,7 @@ class RagBackend:
         current_chunk_index = 0
 
         for chunk in chunks:
-            source = chunk.metadata.get("source")
+            source = Path(chunk.metadata.get("source")).name
             page = chunk.metadata.get("page")
             current_page_id = f"{source}:{page}"
 
@@ -119,10 +129,7 @@ class RagBackend:
 
         return chunks
 
-    def reset(self) -> None:
-        self.db.reset_collection()
 
-
 if __name__ == "__main__":
     logging.basicConfig(
         level=logging.INFO,
blob - fc8369a5e6dd63413cfa96ed393d27f6ba8dda21
blob + 1df8c9a7c238359be87915a3bece49b191f1f4a7
--- rag_indexer.py
+++ rag_indexer.py
@@ -1,5 +1,6 @@
 import argparse
 import logging
+import shutil
 import sys
 
 from configuration import DB_PATH
@@ -22,16 +23,20 @@ if __name__ == "__main__":
         parser.print_help()
         sys.exit(1)
 
-    indexer = RagBackend(args.db)
     if args.reset:
-        logging.info("deleting RAG indexer collection")
-        indexer.reset()
-        logging.info("indexer collection deleted")
+        try:
+            shutil.rmtree(DB_PATH)
+            logging.info("database deleted")
+        except FileNotFoundError:
+            pass
 
+    indexer = RagBackend(args.db)
     for source in args.sources:
         logging.info(f"searching {source}")
         pdf_docs = indexer.load_pdf_documents(source)
+        pdf_docs = indexer.split_documents(pdf_docs)
         indexer.add_to_index(pdf_docs)
         text_docs = indexer.load_text_documents(source)
+        text_docs = indexer.split_documents(text_docs)
         indexer.add_to_index(text_docs)
         logging.info(f"added {len(text_docs)} text documents to index")
blob - 084a7fa2fb50bb73d3c1cc670e39ce79417e01c4
blob + 6488fa41e5750ac622af34ae8b0c9c30d553a31d
--- rag_interface.py
+++ rag_interface.py
@@ -26,12 +26,11 @@ def handle_mcp():
     params = data.get('params', {})
     # Dispatch to the correct tool
     if method == "llm_chat":
-        result, refs = rag.query(params.get('query', ''), params.get('history', ''))
+        result = rag.query(params.get('query', ''), params.get('history', ''))
         return jsonify({
             "jsonrpc": "2.0",
             "result": {
-                'text': result,
-                'ref': refs
+                'text': result
             },
             "id": data.get("id")
         })
blob - c51160a44bcbdf3037f850b08e4a5a7d08efbc4b
blob + a668a2e62eb42faa91582421e5e2abae6bed247b
--- requirements.txt
+++ requirements.txt
@@ -1,8 +1,8 @@
 pypdf
+langchain
 langchain-community
 langchain-ollama
 langchain-chroma
 flask
 chromadb
-pytest
-mcp[cli]
\ No newline at end of file
+pytest
\ No newline at end of file
blob - 8a95ebf26866fb6afc2dc76c1f1a36e26976fe26
blob + 88329261c0ecff8190c540617f5e4ab7ed601a0c
--- templates/page.html
+++ templates/page.html
@@ -14,6 +14,7 @@
             background: #f4f7fa;
             box-sizing: border-box;
         }
+
         #container {
             height: 100vh;
             width: 100vw;
@@ -21,12 +22,14 @@
             flex-direction: column;
             background: #fff;
         }
+
         #chat-container {
             display: flex;
             flex-direction: column;
             height: 100vh;
             width: 100vw;
         }
+
         #header {
             background: #4a90e2;
             color: #fff;
@@ -34,6 +37,7 @@
             font-size: 1.3em;
             letter-spacing: 1px;
         }
+
         #chat {
             flex: 1 1 auto;
             padding: 32px 24px;
@@ -42,38 +46,46 @@
             flex-direction: column;
             gap: 10px;
         }
+
         .msg {
             display: flex;
             margin-bottom: 8px;
         }
+
         .msg.user {
             justify-content: flex-end;
         }
+
         .msg.bot {
             justify-content: flex-start;
         }
+
         .bubble {
             padding: 12px 16px;
             border-radius: 18px;
             max-width: 70%;
             word-wrap: break-word;
         }
+
         .bubble.user {
             background: #e1f5fe;
             color: #222;
             border-bottom-right-radius: 4px;
         }
+
         .bubble.bot {
             background: #f1f0f0;
             color: #222;
             border-bottom-left-radius: 4px;
         }
+
         #input-area {
             display: flex;
             border-top: 1px solid #eee;
             padding: 16px 24px;
             background: #fafbfc;
         }
+
         #input {
             flex: 1;
             padding: 10px;
@@ -85,6 +97,7 @@
             line-height: 1.2;
             box-sizing: border-box;
         }
+
         #send {
             margin-left: 10px;
             padding: 10px 20px;
@@ -95,21 +108,25 @@
             font-size: 1em;
             cursor: pointer;
         }
+
         #send:disabled {
             background: #a0c8f0;
             cursor: not-allowed;
         }
+
         #spinner {
             display: none;
             margin-left: 10px;
             align-self: center;
         }
+
         .lds-ring {
             display: inline-block;
             position: relative;
             width: 24px;
             height: 24px;
         }
+
         .lds-ring div {
             box-sizing: border-box;
             display: block;
@@ -122,21 +139,38 @@
             animation: lds-ring 1.2s linear infinite;
             border-color: #4a90e2 transparent transparent transparent;
         }
-        .lds-ring div:nth-child(1) { animation-delay: -0.45s; }
-        .lds-ring div:nth-child(2) { animation-delay: -0.3s; }
-        .lds-ring div:nth-child(3) { animation-delay: -0.15s; }
+
+        .lds-ring div:nth-child(1) {
+            animation-delay: -0.45s;
+        }
+
+        .lds-ring div:nth-child(2) {
+            animation-delay: -0.3s;
+        }
+
+        .lds-ring div:nth-child(3) {
+            animation-delay: -0.15s;
+        }
+
         @keyframes lds-ring {
-            0% { transform: rotate(0deg); }
-            100% { transform: rotate(360deg); }
+            0% {
+                transform: rotate(0deg);
+            }
+            100% {
+                transform: rotate(360deg);
+            }
         }
+
         @media (max-width: 700px) {
             #container, #chat-container {
                 height: 100vh;
                 width: 100vw;
             }
+
             #chat {
                 padding: 16px 8px;
             }
+
             #input-area {
                 padding: 12px 8px;
             }
@@ -166,7 +200,7 @@
     const send = document.getElementById('send');
     const spinner = document.getElementById('spinner');
 
-    function appendMessage(sender, text) {
+    function appendMessage(sender, text, references = []) {
         const msgDiv = document.createElement('div');
         msgDiv.className = 'msg ' + sender;
         const bubble = document.createElement('div');
@@ -192,13 +226,13 @@
         const requestPayload = {
             jsonrpc: "2.0",
             method: "llm_chat",
-            params: { query: query, history: historyText },
+            params: {query: query, history: historyText},
             id: requestId
         };
 
         const response = await fetch('/mcp', {
             method: 'POST',
-            headers: { 'Content-Type': 'application/json' },
+            headers: {'Content-Type': 'application/json'},
             body: JSON.stringify(requestPayload)
         });
 
@@ -215,7 +249,7 @@
         return data.result;
     }
 
-    send.onclick = async function() {
+    send.onclick = async function () {
         const text = input.value.trim();
         if (!text) return;
         appendMessage('user', text);
@@ -231,7 +265,7 @@
         try {
             const result = await sendMCPMessage(text, context_prompt);
             appendMessage('bot', result.text);
-            history.push({ question: text, answer: result.text });
+            history.push({question: text, answer: result.text});
         } catch (error) {
             appendMessage('bot', `Error: ${error.message}`);
         } finally {
@@ -242,17 +276,17 @@
     // input.addEventListener("keyup", function(event) {
     //     if (event.key === "Enter" && !send.disabled) send.click();
     // });
-    input.addEventListener("keydown", function(event) {
+    input.addEventListener("keydown", function (event) {
         if (event.key === "Enter" && (event.metaKey || event.ctrlKey)) {
-            if(send.disabled) return;
+            if (send.disabled) return;
             event.preventDefault();
             send.click();
         }
     });
-    input.addEventListener('input', function() {
+    input.addEventListener('input', function () {
         this.style.height = 'auto'; // Reset height
         this.style.height = (this.scrollHeight) + 'px'; // Set to scroll height
     });
 </script>
 </body>
-</html>
+</html>
\ No newline at end of file