thinkberg.com

Commit Diff

Commit:: d0d125e7ac0a5cc3e9924691160301dae740bcc0
From:: Matthias L. Jugel <leo@thinkberg.com>
Date:: Fri Jul 11 18:05:25 2025 UTC
Message:: modify backend and frontent to use mcp protocol
Actions:: Patch | Tree
commit - 830d8191936320c0ae569b373d61df7a307efae8
commit + d0d125e7ac0a5cc3e9924691160301dae740bcc0
blob - 28cc2e8d33bdf6da8ef2122058b7babb82a09beb
blob + 6ffc0ca8fa7694b4627f0701462b98f3b70b378d
--- rag_backend.py
+++ rag_backend.py
@@ -1,5 +1,7 @@
 import argparse
+import logging
 
+from chromadb import Settings
 from langchain_chroma import Chroma
 from langchain.prompts import ChatPromptTemplate
 from langchain_ollama import OllamaLLM
@@ -10,11 +12,14 @@ from configuration import embeddings, DB_PATH, PROMPT_
 class RagBackend:
     def __init__(self, db_path: str = None):
         self.db_path = db_path if db_path else DB_PATH
-        embedding_function = embeddings()
-        self.db = Chroma(persist_directory=self.db_path, embedding_function=embedding_function)
+        self.db = Chroma(
+            persist_directory=self.db_path,
+            embedding_function=embeddings(),
+            client_settings=Settings(anonymized_telemetry=False)
+        )
         self.model = OllamaLLM(model="llama3")
 
-    def query(self, query_text: str, history: str) -> list[list[str]]:
+    def query(self, query_text: str, history: str) -> (str, list[str]):
         # look up possible context from the index
         context_docs = self.db.similarity_search_with_score(query_text, k=5)
 
@@ -28,7 +33,11 @@ class RagBackend:
 
 
 if __name__ == "__main__":
-    # Create CLI.
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s %(name)s %(message)s'
+    )
+
     parser = argparse.ArgumentParser()
     parser.add_argument("--db", default=DB_PATH, help="path to the database")
     parser.add_argument("query_text", type=str, help="The query text.")
blob - bf9f1c65f4f8d39eed574808bd702857278f938d
blob + 28da3f54b65e1e8b1cb46f9c296e7ae2e2e40e0b
--- rag_indexer.py
+++ rag_indexer.py
@@ -10,7 +10,7 @@ from langchain_community.document_loaders.pdf import P
 from langchain_community.document_loaders.text import TextLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 
-from configuration import embeddings
+from configuration import embeddings, DB_PATH
 
 
 class RagIndexer:
@@ -72,7 +72,7 @@ class RagIndexer:
                 self._db.add_documents(batch_chunks, ids=batch_chunk_ids)
                 logging.info(f"new chunk batch {start_idx + 1} to {end_idx} added")
         else:
-            logging.warn("no new or updated chunks found")
+            logging.warning("no new or updated chunks found")
 
     def calculate_chunk_ids(self, chunks: list[Document]) -> list[Document]:
         # This will create IDs like "source.ext:6:2"
blob - fde7de1e6fc84adcd3fa7758a0ffcb7516fa09ea
blob + 54f332f4b361ede4044c6272cdf3d3da1c1fa3d1
--- rag_interface.py
+++ rag_interface.py
@@ -1,3 +1,5 @@
+import logging
+
 from flask import Flask, request, jsonify, render_template
 
 from rag_backend import RagBackend
@@ -5,22 +7,35 @@ from rag_backend import RagBackend
 app = Flask(__name__)
 rag = RagBackend()
 
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s %(name)s %(message)s'
+)
+
+
 @app.route('/')
 def home():
     return render_template("page.html")
 
 
-@app.route('/chat', methods=['POST'])
-def chat():
-    user_message = request.json.get('message', '')
-    history_message = request.json.get('history', '')
-    llm_response, references = rag.query(user_message, history_message)
-    return jsonify({
-        'reply': {
-            'text': llm_response,
-            'references': references
-        }
-    })
+# MCP endpoint
+@app.route('/mcp', methods=['POST'])
+def handle_mcp():
+    data = request.get_json()
+    method = data.get('method')
+    params = data.get('params', {})
+    # Dispatch to the correct tool
+    if method == "llm_chat":
+        result, refs = rag.query(params.get('query', ''), params.get('history', ''))
+        return jsonify({
+            "jsonrpc": "2.0",
+            "result": {
+                'text': result,
+                'ref': refs},
+            "id": data.get("id")
+        })
+    # Add more tool handlers as needed
+    return jsonify({"jsonrpc": "2.0", "error": {"code": -32601, "message": "Method not found"}, "id": data.get("id")})
 
 
 if __name__ == '__main__':
blob - 5ea01edd5c332347a36464da1aa3f7e359924abe
blob + c51160a44bcbdf3037f850b08e4a5a7d08efbc4b
--- requirements.txt
+++ requirements.txt
@@ -5,3 +5,4 @@ langchain-chroma
 flask
 chromadb
 pytest
+mcp[cli]
\ No newline at end of file
blob - 1cc031ac6eccb63d792a719f289a4ec552b2d5a4
blob + 9aa19b8971ac34dacc009c9b1ac133747ce7d5e0
--- templates/page.html
+++ templates/page.html
@@ -183,19 +183,35 @@
         input.disabled = isBusy;
     }
 
-    // Helper to construct file link from reference object
-    function referenceToLink(refObj) {
-        // refObj: { ref: "filename.pdf:12:34", title: "Title" }
-        const match = refObj.ref.match(/^(.+?):(\d+):(\d+)$/);
-        if (!match) return refObj.ref;
-        const filename = match[1];
-        const page = match[2];
-        const url = `/static/files/${encodeURIComponent(filename)}#page=${page}`;
-        // Show the title (bold), then the link
-        return `<a href="${url}" target="_blank" rel="noopener">${refObj.title}, p.${page}</a>`;
+    async function sendMCPMessage(query, historyText) {
+        const requestId = Date.now();
+        const requestPayload = {
+            jsonrpc: "2.0",
+            method: "llm_chat",
+            params: { query: query, history: historyText },
+            id: requestId
+        };
+
+        const response = await fetch('/mcp', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify(requestPayload)
+        });
+
+        if (!response.ok) {
+            throw new Error(`HTTP error! status: ${response.status}`);
+        }
+
+        const data = await response.json();
+
+        if (data.error) {
+            throw new Error(data.error.message || 'Unknown MCP error');
+        }
+
+        return data.result;
     }
 
-    send.onclick = function() {
+    send.onclick = async function() {
         const text = input.value.trim();
         if (!text) return;
         appendMessage('user', text);
@@ -206,22 +222,16 @@
         history.forEach(entry => {
             context_prompt += `Question: ${entry.question}\nAnswer: ${entry.answer}\n`;
         });
-        fetch('/chat', {
-            method: 'POST',
-            headers: { 'Content-Type': 'application/json' },
-            body: JSON.stringify({ message: text, history: context_prompt })
-        })
-        .then(response => response.json())
-        .then(data => {
-            appendMessage('bot', data.reply.text);
-            // Save to history
-            history.push({ question: text, answer: data.reply.text });
-                    setBusy(false);
-        })
-        .catch(() => {
-            appendMessage('bot', "Sorry, there was an error.");
+
+        try {
+            const result = await sendMCPMessage(text, context_prompt);
+            appendMessage('bot', result.text);
+            history.push({ question: text, answer: result.text });
+        } catch (error) {
+            appendMessage('bot', `Error: ${error.message}`);
+        } finally {
             setBusy(false);
-        });
+        }
     };
 
     input.addEventListener("keyup", function(event) {