tool search_codebase, read_local_file, list_directory

d1b6adb5 · Đặng Trần Nguyên Khang · b3281caa · d1b6adb5
Commit d1b6adb5 authored Apr 22, 2026 by Đặng Trần Nguyên Khang
Show whitespace changes
Inline Side-by-side

Showing with 196 additions and 0 deletions

memory_mcp_server_stdio.py memory_mcp_server_stdio.py +196 -0

No files found.
--- a/memory_mcp_server_stdio.py
+++ b/memory_mcp_server_stdio.py
 import os
+try:
+    import docx
+    HAS_DOCX = True
+except ImportError:
+    HAS_DOCX = False
 import pathlib
 import uuid
 from datetime import datetime
@@ -27,6 +32,23 @@ from qdrant_client.models import (
 from sentence_transformers import SentenceTransformer
 from gitlab import Gitlab

+# ==========================================
+# CẤU HÌNH TOÀN CỤC (GLOBAL CONFIG)
+# Sửa đường dẫn ở đây thì các tool đều nhận
+# ==========================================
+PROJECT_ROOT = r"D:\Do an tot nghiep\Daovan"
+
+IGNORE_DIRS = {
+    '.git', 'venv', 'node_modules', '__pycache__', 
+    '.idea', '.vscode', '.sixth', '.vs',       # Thêm .vscode, .sixth và các thư mục ẩn IDE
+    'bin', 'obj', 'Migrations', 'packages'     # Thư mục build của .NET
+}
+
+IGNORE_EXTENSIONS = {
+    '.exe', '.dll', '.png', '.jpg', '.jpeg', '.gif', 
+    '.pdf', '.zip', '.tar', '.gz', '.pyc', '.pdb'
+}
+
 load_dotenv()

 # Config
@@ -578,5 +600,179 @@ def get_postgres_function_code(function_name: str) -> str:
        pg_conn.rollback()
        return f"Lỗi lấy code function: {str(e)}"

+@mcp.tool()
+def search_codebase(keyword: str, search_path: str = PROJECT_ROOT) -> str:
+    """
+    Quét toàn bộ mã nguồn trong dự án để tìm một từ khóa (keyword) cụ thể.
+    Rất hữu ích để tìm file xử lý API route (VD: '/api/v1/bills') hoặc tìm nơi khai báo hàm/biến.
+    Trả về danh sách các file và dòng code chứa từ khóa.
+    """
+    results = []
+    match_count = 0
+    max_matches = 20 # Giới hạn số lượng kết quả để tránh tràn Token của AI
+
+    if not os.path.exists(search_path):
+        return f"Lỗi: Không tìm thấy thư mục {search_path}"
+
+    for root, dirs, files in os.walk(search_path):
+        # Bỏ qua các thư mục rác/thư viện (sửa trực tiếp list dirs để os.walk không đi sâu vào)
+        dirs[:] = [d for d in dirs if d not in IGNORE_DIRS]
+
+        for file in files:
+            # Bỏ qua file nhị phân
+            ext = os.path.splitext(file)[1].lower()
+            if ext in IGNORE_EXTENSIONS:
+                continue
+
+            file_path = os.path.join(root, file)
+            
+            try:
+                # Đọc file để tìm từ khóa
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    lines = f.readlines()
+                    for line_num, line in enumerate(lines, 1):
+                        if keyword in line:
+                            clean_line = line.strip()
+                            # Rút gọn đường dẫn cho AI dễ đọc
+                            rel_path = os.path.relpath(file_path, search_path)
+                            results.append(f"- File: `{rel_path}` (Dòng {line_num}): {clean_line}")
+                            match_count += 1
+                            
+                            # Cắt ngang nếu tìm thấy quá nhiều (Tránh spam API)
+                            if match_count >= max_matches:
+                                results.append(f"\n... (Đã đạt giới hạn {max_matches} kết quả. Hãy cung cấp từ khóa chi tiết hơn nếu chưa tìm thấy thông tin cần thiết).")
+                                return "\n".join(results)
+            except UnicodeDecodeError:
+                # Bỏ qua các file mã hóa lạ lọt lưới
+                continue
+            except Exception:
+                # Bỏ qua lỗi phân quyền
+                continue
+
+    if not results:
+        return f"Không tìm thấy từ khóa '{keyword}' trong thư mục {search_path}"
+
+    return "\n".join(results)
+
+@mcp.tool()
+def read_local_file(file_path: str) -> str:
+    """
+    Đọc toàn bộ nội dung của một file text, code, csv hoặc markdown, docx.
+    BẮT BUỘC dùng tool này để đọc file FRS.docx, architecture.md hoặc xem chi tiết source code.
+    Tham số file_path phải là đường dẫn tuyệt đối (Absolute path) hoặc đường dẫn tương đối từ Project Root.
+    """
+    
+    if not os.path.isabs(file_path):
+        full_path = os.path.join(PROJECT_ROOT, file_path)
+    else:
+        full_path = file_path
+
+    if not os.path.exists(full_path):
+        return f"Lỗi: Không tìm thấy file tại đường dẫn '{full_path}'"
+
+    if not os.path.isfile(full_path):
+        return f"Lỗi: '{full_path}' là một thư mục, không phải là file. Hãy dùng tool list_directory."
+
+    # Nới lỏng dung lượng lên 500KB vì file Word thường nặng hơn file Text
+    MAX_SIZE_BYTES = 500 * 1024 
+    file_size = os.path.getsize(full_path)
+    if file_size > MAX_SIZE_BYTES:
+        return f"Lỗi: File quá lớn ({file_size // 1024} KB). Vượt quá giới hạn an toàn."
+
+    # --- KHỐI XỬ LÝ RIÊNG CHO FILE WORD (.DOCX) ---
+    if full_path.lower().endswith('.docx'):
+        if not HAS_DOCX:
+            return "Lỗi: Server chưa được cài đặt thư viện python-docx. Hãy chạy 'pip install python-docx'."
+        try:
+            doc = docx.Document(full_path)
+            text_content = []
+            
+            # 1. Đọc các đoạn văn bản thường
+            for para in doc.paragraphs:
+                if para.text.strip():
+                    text_content.append(para.text.strip())
+            
+            # 2. Đọc dữ liệu trong các Bảng (Tables) - Cực kỳ quan trọng cho FRS
+            for table in doc.tables:
+                for row in table.rows:
+                    # Gom các ô trong một hàng, ngăn cách bằng dấu |
+                    row_data = [cell.text.replace('\n', ' ').strip() for cell in row.cells if cell.text.strip()]
+                    if row_data:
+                        text_content.append(" | ".join(row_data))
+                        
+            content = "\n".join(text_content)
+            return f"--- NỘI DUNG TÀI LIỆU WORD: {full_path} ---\n\n{content}"
+            
+        except Exception as e:
+            return f"Lỗi khi trích xuất file Word: {str(e)}"
+
+    # --- KHỐI XỬ LÝ CHO FILE TEXT THÔNG THƯỜNG ---
+    try:
+        with open(full_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+            return f"--- NỘI DUNG FILE: {full_path} ---\n\n{content}"
+    except UnicodeDecodeError:
+        return f"Lỗi: File '{full_path}' có định dạng mã hóa không được hỗ trợ (Binary/PDF/Exe...)."
+    except Exception as e:
+        return f"Lỗi hệ thống khi đọc file: {str(e)}"
+
+@mcp.tool()
+def list_directory(directory_path: str = PROJECT_ROOT) -> str:
+    """
+    Quét và trả về cấu trúc cây thư mục của một đường dẫn.
+    Giúp Agent có cái nhìn tổng quan về kiến trúc Project, tìm vị trí file FRS.docx hoặc architecture.md.
+    """
+    
+    # Xử lý đường dẫn: Nếu truyền đường dẫn tương đối thì ghép với thư mục gốc
+    if not os.path.isabs(directory_path):
+        full_path = os.path.join(PROJECT_ROOT, directory_path)
+    else:
+        full_path = directory_path
+
+    if not os.path.exists(full_path):
+        return f"Lỗi: Không tìm thấy thư mục '{full_path}'"
+    if not os.path.isdir(full_path):
+        return f"Lỗi: '{full_path}' không phải là thư mục. Hãy dùng tool read_local_file nếu muốn đọc file này."
+    
+    tree_output = [f"📁 CẤU TRÚC THƯ MỤC CỦA: {full_path}\n"]
+    
+    try:
+        # --- CÁC CẦU CHÌ BẢO VỆ TOKEN ---
+        MAX_DEPTH = 3              # Chỉ quét sâu tối đa 3 tầng thư mục
+        MAX_FILES_PER_DIR = 30     # Chỉ hiện tối đa 30 file mỗi thư mục
+        
+        base_depth = full_path.rstrip(os.path.sep).count(os.path.sep)
+        
+        for root, dirs, files in os.walk(full_path):
+            current_depth = root.count(os.path.sep) - base_depth
+            
+            # Cầu chì 1: Dừng quét nếu quá sâu
+            if current_depth >= MAX_DEPTH:
+                dirs.clear() # Xóa list dirs để os.walk không đi sâu thêm nữa
+                continue
+                
+            # Cầu chì 2: Bỏ qua thư mục rác
+            dirs[:] = [d for d in dirs if d not in IGNORE_DIRS]
+            
+            # Tạo khoảng trắng thụt lề cho đẹp mắt
+            indent = "  " * current_depth
+            
+            # In tên thư mục hiện tại (bỏ qua thư mục gốc vì đã in ở trên)
+            if current_depth > 0:
+                folder_name = os.path.basename(root)
+                tree_output.append(f"{indent}📂 {folder_name}/")
+                indent += "  " # Thụt lề thêm 1 bậc cho các file bên trong
+            
+            # Cầu chì 3: Giới hạn số file in ra
+            for i, file in enumerate(files):
+                if i >= MAX_FILES_PER_DIR:
+                    tree_output.append(f"{indent}📄 ... ({len(files) - MAX_FILES_PER_DIR} files bị ẩn để tiết kiệm bộ nhớ)")
+                    break
+                tree_output.append(f"{indent}📄 {file}")
+                
+        return "\n".join(tree_output)
+    except Exception as e:
+        return f"Lỗi hệ thống khi quét thư mục: {str(e)}"
+
 if __name__ == "__main__":
    mcp.run()
\ No newline at end of file