├── README.md
├── extractFullReport.py
├── extract_embedding.py
├── main.py
├── main_functions.py
└── requirements.txt
/README.md:
--------------------------------------------------------------------------------
1 | # Automatic Documentation Generator for Python Projects
2 |
3 | ##Overview
4 |
5 | This project is an innovative automated documentation tool that uses static code analysis and artificial intelligence to generate comprehensive technical documents for Python projects.
6 |
7 | ##Main Features
8 |
9 | ### Advanced Code Analysis
10 | - Full scan of Python projects
11 | - Specific extraction of code structure
12 | - Identification of classes, functions, imports and doctrines
13 | - Calculation of code complexity
14 |
15 | ### AI-powered Documentation Generation
16 | - Use of advanced language models (Ollama/LLaMA)
17 | - Automatic generation of:
18 | - Project overview
19 | - Summaries of individual files
20 | - Description of interactions between modules
21 |
22 | ### Multiple Output Formats
23 | - JSON documentation for programmatic processing
24 | - Markdown file for human reading
25 | - HTML page for viewing in the browser
26 |
27 | ## Technologies Used
28 |
29 | - **Code Analysis**: Python AST (Abstract Syntax Tree)
30 | - **AI**: Ollama with LLaMA model
31 | - **Processing**:
32 | - Colorama (colored logs)
33 | - tqdm (progress bars)
34 | - markdown (document conversion)
35 |
36 | ## Benefits
37 |
38 | 1. **Time Saving**: Automatic documentation in minutes
39 | 2. **Consistency**: Standardized documentation
40 | 3. **Completeness**: Deep code analysis
41 | 4. **Flexibility**: Adaptable to different project structures
42 |
43 | ## Use Cases
44 |
45 | - Academic project documentation
46 | - Developer onboarding
47 | - Legacy code maintenance
48 | - Software architecture analysis
49 |
50 | ## Next Steps
51 |
52 | - Support for more programming languages
53 | - Integration with CI/CD tools
54 | - Refinement of AI models
55 | - Advanced documentation customization
56 |
57 | ## Contribution
58 |
59 | Contributions are welcome! Please review the contribution guidelines before submitting pull requests.
60 |
61 | ## License
62 |
63 | MIT License
64 |
--------------------------------------------------------------------------------
/extractFullReport.py:
--------------------------------------------------------------------------------
1 | import os
2 | import ast
3 | import json
4 | import markdown
5 | import time
6 | from typing import List, Dict
7 | from openai import OpenAI
8 | import colorama
9 | from tqdm import tqdm
10 |
11 | # Initialize colorama for terminal colors
12 | colorama.init(autoreset=True)
13 |
14 | # Ollama client configuration
15 | client = OpenAI(
16 | base_url='http://localhost:11434/v1',
17 | api_key='ollama'
18 | )
19 |
20 | def log_info(message):
21 | """Prints informative messages in blue"""
22 | print(f"{colorama.Fore.CYAN}[INFO] {message}{colorama.Fore.RESET}")
23 |
24 | def log_warning(message):
25 | """Prints warning messages in yellow"""
26 | print(f"{colorama.Fore.YELLOW}[WARN] {message}{colorama.Fore.RESET}")
27 |
28 | def log_error(message):
29 | """Prints error messages in red"""
30 | print(f"{colorama.Fore.RED}[ERROR] {message}{colorama.Fore.RESET}")
31 |
32 | def log_success(message):
33 | """Prints success messages in green"""
34 | print(f"{colorama.Fore.GREEN}[SUCCESS] {message}{colorama.Fore.RESET}")
35 |
36 | def analyze_file(file_path: str) -> Dict:
37 | """Analyzes an individual Python file"""
38 | try:
39 | log_info(f"Analyzing file: {file_path}")
40 | start_time = time.time()
41 |
42 | with open(file_path, "r", encoding="utf-8") as f:
43 | code = f.read()
44 | tree = ast.parse(code)
45 | file_info = {
46 | "file": file_path,
47 | "classes": [],
48 | "functions": [],
49 | "imports": [],
50 | "docstrings": [],
51 | "complexity": 0
52 | }
53 |
54 | # Detailed AST analysis
55 | for node in ast.iter_child_nodes(tree):
56 | if isinstance(node, ast.ClassDef):
57 | file_info["classes"].append({
58 | "name": node.name,
59 | "methods": [method.name for method in node.body if isinstance(method, ast.FunctionDef)]
60 | })
61 | elif isinstance(node, ast.FunctionDef):
62 | file_info["functions"].append({
63 | "name": node.name,
64 | "args": [arg.arg for arg in node.args.args],
65 | "complexity": len(list(ast.walk(node)))
66 | })
67 | elif isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom):
68 | file_info["imports"].append(ast.unparse(node))
69 | elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Str):
70 | file_info["docstrings"].append(node.value.s)
71 |
72 | end_time = time.time()
73 | log_success(f"File analysis completed in {end_time - start_time:.2f} seconds")
74 | return file_info
75 | except Exception as e:
76 | log_error(f"Error analyzing {file_path}: {e}")
77 | return {}
78 |
79 | def collect_python_files(directory: str) -> List[str]:
80 | """Collects all Python files in a directory"""
81 | log_info(f"Collecting Python files in: {directory}")
82 | python_files = []
83 | total_files = 0
84 | for root, _, files in os.walk(directory):
85 | for file in files:
86 | if file.endswith(".py"):
87 | python_files.append(os.path.join(root, file))
88 | total_files += 1
89 |
90 | log_success(f"Found {total_files} Python files")
91 | return python_files
92 |
93 | def generate_documentation(analysis_results: List[Dict], model: str = "qwen2.5:14b-instruct-q4_K_M") -> Dict:
94 | """Generates documentation using LLM"""
95 | documentation = {
96 | "project_overview": "",
97 | "file_summaries": {},
98 | "module_interactions": ""
99 | }
100 |
101 | # Project overview analysis
102 | log_info("Generating project overview")
103 | overview_prompt = "Analyze this project structure and provide a comprehensive overview:\n\n"
104 | for result in analysis_results:
105 | overview_prompt += f"File: {result['file']}\n"
106 | overview_prompt += f"Classes: {', '.join([cls['name'] for cls in result['classes']])}\n"
107 | overview_prompt += f"Functions: {', '.join([func['name'] for func in result['functions']])}\n\n"
108 |
109 | overview_prompt += "Describe the project's purpose, main components, and how they interact."
110 |
111 | try:
112 | overview_completion = client.chat.completions.create(
113 | model=model,
114 | messages=[
115 | {"role": "system", "content": "You are an expert in machine learning project analysis."},
116 | {"role": "user", "content": overview_prompt}
117 | ]
118 | )
119 | documentation["project_overview"] = overview_completion.choices[0].message.content
120 | log_success("Project overview generated")
121 | except Exception as e:
122 | log_error(f"Error generating project overview: {e}")
123 |
124 | # Individual file summaries
125 | log_info("Generating file summaries")
126 | for result in tqdm(analysis_results, desc="Processing files"):
127 | try:
128 | file_summary_prompt = f"Analyze the file {result['file']} and explain its purpose and key components:\n"
129 | file_summary_prompt += f"Classes: {', '.join([cls['name'] for cls in result['classes']])}\n"
130 | file_summary_prompt += f"Functions: {', '.join([func['name'] for func in result['functions']])}\n"
131 |
132 | file_summary_completion = client.chat.completions.create(
133 | model=model,
134 | messages=[
135 | {"role": "system", "content": "You are an expert in code analysis."},
136 | {"role": "user", "content": file_summary_prompt}
137 | ]
138 | )
139 |
140 | documentation["file_summaries"][result['file']] = {
141 | "summary": file_summary_completion.choices[0].message.content,
142 | "details": result
143 | }
144 | except Exception as e:
145 | log_warning(f"Error generating summary for {result['file']}: {e}")
146 |
147 | # Module interactions
148 | log_info("Generating module interaction description")
149 | try:
150 | interaction_prompt = "Describe how the modules and components in this project interact with each other."
151 | interaction_completion = client.chat.completions.create(
152 | model=model,
153 | messages=[
154 | {"role": "system", "content": "You are an expert in software architecture."},
155 | {"role": "user", "content": interaction_prompt}
156 | ]
157 | )
158 | documentation["module_interactions"] = interaction_completion.choices[0].message.content
159 | log_success("Module interaction description generated")
160 | except Exception as e:
161 | log_error(f"Error generating module interactions: {e}")
162 |
163 | return documentation
164 |
165 | def save_documentation(documentation: Dict, output_dir: str = "project_docs"):
166 | """Saves documentation in multiple formats"""
167 | log_info(f"Saving documentation to directory: {output_dir}")
168 |
169 | # Create output directory
170 | os.makedirs(output_dir, exist_ok=True)
171 |
172 | # Save JSON
173 | json_path = os.path.join(output_dir, "project_documentation.json")
174 | try:
175 | with open(json_path, "w", encoding="utf-8") as f:
176 | json.dump(documentation, f, indent=2)
177 | log_success(f"JSON documentation saved to: {json_path}")
178 | except Exception as e:
179 | log_error(f"Error saving JSON: {e}")
180 |
181 | # Generate Markdown
182 | markdown_content = f"""# Project Documentation
183 |
184 | ## Project Overview
185 | {documentation['project_overview']}
186 |
187 | ## Module Interactions
188 | {documentation['module_interactions']}
189 |
190 | ## File Summaries
191 | """
192 |
193 | for file_path, file_info in documentation['file_summaries'].items():
194 | markdown_content += f"""
195 | ### {file_path}
196 | {file_info['summary']}
197 |
198 | #### Detailed Components
199 | - **Classes**: {[cls['name'] for cls in file_info['details']['classes']]}
200 | - **Functions**: {[func['name'] for func in file_info['details']['functions']]}
201 | """
202 |
203 | # Save Markdown
204 | md_path = os.path.join(output_dir, "project_documentation.md")
205 | try:
206 | with open(md_path, "w", encoding="utf-8") as f:
207 | f.write(markdown_content)
208 | log_success(f"Markdown documentation saved to: {md_path}")
209 | except Exception as e:
210 | log_error(f"Error saving Markdown: {e}")
211 |
212 | # Convert to HTML
213 | try:
214 | html_content = markdown.markdown(markdown_content)
215 | html_path = os.path.join(output_dir, "project_documentation.html")
216 | with open(html_path, "w", encoding="utf-8") as f:
217 | f.write(f"""
218 |
219 |
220 |
221 | Project Documentation
222 |
225 |
226 |
227 | {html_content}
228 |
229 |
230 | """)
231 | log_success(f"HTML documentation saved to: {html_path}")
232 | except Exception as e:
233 | log_error(f"Error saving HTML: {e}")
234 |
235 | if __name__ == "__main__":
236 | # Start of execution
237 | start_total_time = time.time()
238 | log_info("Starting project analysis")
239 |
240 | # Project directory
241 | project_dir = "/home/marcos/projetos_automatizacao/ENTENDER_textgrad/textgrad"
242 |
243 | # File collection
244 | python_files = collect_python_files(project_dir)
245 |
246 | # File analysis
247 | log_info("Starting detailed file analysis")
248 | results = []
249 | for file in tqdm(python_files, desc="Analyzing files"):
250 | file_result = analyze_file(file)
251 | if file_result:
252 | results.append(file_result)
253 |
254 | # Documentation generation
255 | log_info("Generating documentation with AI assistant")
256 | documentation = generate_documentation(results)
257 |
258 | # Saving documentation
259 | save_documentation(documentation)
260 |
261 | # Total execution time
262 | end_total_time = time.time()
263 | log_success(f"Analysis completed in {end_total_time - start_total_time:.2f} seconds")
264 | print("\nDocumentation generated successfully in 'project_docs' directory!")
--------------------------------------------------------------------------------
/extract_embedding.py:
--------------------------------------------------------------------------------
1 | import os
2 | import ollama
3 | import chromadb
4 |
5 | class SemanticSearchChroma:
6 | def __init__(self, collection_name='document_embeddings'):
7 | """
8 | Inicializa cliente ChromaDB e cria/carrega coleção
9 | """
10 | self.client = chromadb.PersistentClient(path="./chroma_storage")
11 | self.collection = self.client.get_or_create_collection(name=collection_name)
12 |
13 | def add_documents(self, directory):
14 | """
15 | Adiciona documentos do diretório à coleção ChromaDB
16 | """
17 | for filename in os.listdir(directory):
18 | filepath = os.path.join(directory, filename)
19 |
20 | if os.path.isfile(filepath):
21 | with open(filepath, 'r', encoding='utf-8') as file:
22 | content = file.read()
23 |
24 | # Gera embedding
25 | embedding = ollama.embeddings(
26 | model='mxbai-embed-large',
27 | prompt=content
28 | )['embedding']
29 |
30 | # Adiciona ao ChromaDB
31 | self.collection.add(
32 | embeddings=[embedding],
33 | documents=[content],
34 | ids=[filename]
35 | )
36 |
37 | def search(self, query, n_results=3):
38 | """
39 | Busca semântica com ChromaDB
40 | """
41 | query_embedding = ollama.embeddings(
42 | model='mxbai-embed-large',
43 | prompt=query
44 | )['embedding']
45 |
46 | results = self.collection.query(
47 | query_embeddings=[query_embedding],
48 | n_results=n_results
49 | )
50 |
51 | return results
52 |
53 | def main():
54 | base_dir = '/home/marcos/projetos_automatizacao/meu_primeiro_agent/_relatorios/'
55 |
56 | # Inicializa e popula
57 | searcher = SemanticSearchChroma()
58 | searcher.add_documents(base_dir)
59 |
60 | # Exemplo de busca
61 | query = input("Digite o assunto para busca: ")
62 | results = searcher.search(query)
63 |
64 | # Exibe resultados
65 | for i, doc in enumerate(results['documents'][0], 1):
66 | print(f"Resultado {i}:\n{doc[:500]}...\n")
67 |
68 | if __name__ == '__main__':
69 | main()
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | from PyQt5.QtWidgets import (
4 | QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
5 | QLabel, QLineEdit, QPushButton, QFileDialog, QCheckBox,
6 | QTextEdit, QSplitter, QTreeView, QMenu, QAction
7 | )
8 | from PyQt5.QtCore import Qt, QDir, pyqtSignal
9 | from PyQt5.QtGui import QStandardItemModel, QStandardItem
10 | from ollama import chat
11 | from ollama import ChatResponse
12 | from extract_embedding import SemanticSearchChroma
13 | from main_functions import (
14 | collect_python_files,
15 | generate_documentation,
16 | analyze_file,
17 | save_documentation,
18 | log_info,
19 | log_error
20 | )
21 |
22 | searcher = SemanticSearchChroma()
23 |
24 | class DocumentationApp(QMainWindow):
25 | def __init__(self):
26 | super().__init__()
27 | self.setWindowTitle("Automated Project Documentation")
28 | self.setGeometry(100, 100, 1200, 800)
29 | self.localizacao_da_pasta = None
30 |
31 | # Main central widget
32 | main_widget = QWidget()
33 | self.setCentralWidget(main_widget)
34 | main_layout = QHBoxLayout()
35 | main_widget.setLayout(main_layout)
36 |
37 | # Left Sidebar
38 | sidebar = QWidget()
39 | sidebar_layout = QVBoxLayout()
40 | sidebar.setLayout(sidebar_layout)
41 | sidebar.setMaximumWidth(300)
42 |
43 | # Project Directory Selection
44 | dir_label = QLabel("Select Project Directory:")
45 | self.dir_input = QLineEdit()
46 | browse_button = QPushButton("Browse")
47 | browse_button.clicked.connect(self.select_directory)
48 |
49 | self.partial_report_checkbox = QCheckBox("Partial Report")
50 |
51 | # Partial Report Criteria
52 | self.partial_criteria_input = QLineEdit()
53 | self.partial_criteria_input.setPlaceholderText("Enter partial report criteria")
54 | self.partial_criteria_input.setEnabled(False)
55 |
56 | # Toggle partial report input
57 | self.partial_report_checkbox.toggled.connect(
58 | self.partial_criteria_input.setEnabled
59 | )
60 |
61 | # Generate Report Button
62 | generate_button = QPushButton("Generate Documentation")
63 | generate_button.clicked.connect(self.generate_documentation)
64 |
65 | # Add widgets to sidebar
66 | sidebar_layout.addWidget(dir_label)
67 | sidebar_layout.addWidget(self.dir_input)
68 | sidebar_layout.addWidget(browse_button)
69 | sidebar_layout.addWidget(self.partial_report_checkbox)
70 | sidebar_layout.addWidget(self.partial_criteria_input)
71 | sidebar_layout.addWidget(generate_button)
72 |
73 | # Generate Individual Reports Button
74 | generate_individual_button = QPushButton("Generate Individual Reports")
75 | generate_individual_button.clicked.connect(self.generate_individual_reports)
76 | sidebar_layout.addWidget(generate_individual_button)
77 |
78 | # Generate Embeddings Button
79 | self.generate_embedding_button = QPushButton("Gerar embeddings")
80 | self.generate_embedding_button.setEnabled(False)
81 | self.generate_embedding_button.clicked.connect(self.generate_embedding)
82 | sidebar_layout.addWidget(self.generate_embedding_button)
83 | sidebar_layout.addStretch(1)
84 |
85 | # Main Content Area
86 | content_area = QWidget()
87 | content_layout = QVBoxLayout()
88 | content_area.setLayout(content_layout)
89 |
90 | # Results Display
91 | self.results_text = QTextEdit()
92 | self.results_text.setReadOnly(True)
93 |
94 | # File Tree View
95 | self.file_tree = QTreeView()
96 | self.file_tree.setContextMenuPolicy(Qt.CustomContextMenu)
97 | self.file_tree.customContextMenuRequested.connect(self.show_context_menu)
98 |
99 | # Add widgets to content area
100 | content_layout.addWidget(QLabel("Documentation Results:"))
101 | content_layout.addWidget(self.results_text)
102 | content_layout.addWidget(self.file_tree)
103 |
104 | # Add components to main layout
105 | main_layout.addWidget(sidebar)
106 | main_layout.addWidget(content_area)
107 |
108 | def enable_new_button(self):
109 | self.generate_embedding_button.setEnabled(True)
110 |
111 | def generate_embedding(self):
112 | if self.localizacao_da_pasta is not None:
113 | searcher.add_documents(self.localizacao_da_pasta)
114 | else:
115 | print("Localização da pasta não definida")
116 |
117 | def select_directory(self):
118 | """Open directory selection dialog"""
119 | dir_path = QFileDialog.getExistingDirectory(
120 | self,
121 | "Select Project Directory",
122 | os.path.expanduser('~')
123 | )
124 |
125 | if dir_path:
126 | self.dir_input.setText(dir_path)
127 | self.populate_file_tree(dir_path)
128 |
129 |
130 | def populate_file_tree(self, directory):
131 | """Populate file tree with project structure"""
132 | model = QStandardItemModel()
133 | root_item = model.invisibleRootItem()
134 |
135 | try:
136 | log_info(f"Populating file tree for directory: {directory}")
137 | for root, dirs, files in os.walk(directory):
138 | # Criar um item para o diretório atual
139 | dir_item = QStandardItem(os.path.basename(root))
140 | dir_item.setData(root, Qt.UserRole + 1) # Armazena o caminho completo do diretório
141 | dir_item.setSelectable(False) # Impede seleção de diretórios
142 |
143 | for file in files:
144 | # Adiciona apenas arquivos Python
145 | if file.endswith('.py'):
146 | file_item = QStandardItem(file)
147 | # Armazena o caminho completo do arquivo
148 | full_file_path = os.path.join(root, file)
149 | file_item.setData(full_file_path, Qt.UserRole + 1)
150 | dir_item.appendRow(file_item)
151 |
152 | # Adiciona apenas diretórios que contêm arquivos Python
153 | if dir_item.rowCount() > 0:
154 | root_item.appendRow(dir_item)
155 |
156 | self.file_tree.setModel(model)
157 | self.file_tree.doubleClicked.connect(self.generate_individual_report)
158 | except Exception as e:
159 | log_error(f"Error populating file tree: {e}")
160 |
161 | def generate_individual_report(self, index):
162 | """Generate individual report for a double-clicked file"""
163 | # Recupera o item selecionado
164 | selected_item = index.model().itemFromIndex(index)
165 |
166 | # Recupera o caminho completo do arquivo
167 | file_path = selected_item.data(Qt.UserRole + 1)
168 |
169 | # Verifica se é um arquivo (não um diretório)
170 | if os.path.isfile(file_path):
171 | print(f"Arquivo selecionado: {file_path}")
172 |
173 | try:
174 | file_result = analyze_file(file_path)
175 | if file_result:
176 | report = self.generate_file_report(file_path, file_result)
177 | self.results_text.setText(report)
178 | except Exception as e:
179 | log_error(f"Erro ao gerar relatório para {file_path}: {e}")
180 | self.results_text.setText(f"Erro ao analisar arquivo: {e}")
181 |
182 | def generate_file_report(self, file, file_result):
183 | # Create a prompt for the LLM
184 | prompt = f"""
185 | Extract the most relevant information from the file {file} and generate a concise and informative text describing its content.
186 | The generated text should be optimized for semantic search using embeddings.
187 |
188 | Desired output example:
189 |
190 | Main topics: climate change, agriculture, environmental impact, food security, data analysis, statistical modeling.
191 | Content: This scientific study investigates the effects of climate change on global agricultural production.
192 | By analyzing historical data and future projections, the document demonstrates how extreme climate events,
193 | such as droughts and floods, affect agricultural productivity and food availability.
194 | The authors propose adaptation and mitigation measures to ensure food security in a global warming scenario.
195 | """
196 |
197 | # Call the LLM
198 | response: ChatResponse = chat(model='qwen2.5:14b-instruct-q4_K_M', messages=[
199 | {
200 | 'role':'system',
201 | 'content': 'You are an expert in code analysis.'
202 | },
203 | {
204 | 'role': 'user',
205 | 'content': prompt
206 | }
207 | ])
208 |
209 | # Return the report generated by the LLM
210 | return response.message.content
211 |
212 | def generate_documentation(self):
213 | project_dir = self.dir_input.text()
214 | if not project_dir:
215 | self.results_text.setText("Please select a project directory")
216 | return
217 |
218 | try:
219 | documentation = generate_documentation(project_dir)
220 | self.display_documentation_results(documentation)
221 | except Exception as e:
222 | log_error(f"Error generating documentation: {e}")
223 | self.results_text.setText(f"An error occurred: {str(e)}")
224 |
225 | def generate_individual_reports(self):
226 | project_dir = self.dir_input.text()
227 | if not project_dir:
228 | self.results_text.setText("Please select a project directory")
229 | return
230 |
231 | try:
232 | # Collect Python files
233 | python_files = collect_python_files(project_dir)
234 |
235 | # Create a separate folder to store individual reports
236 | reports_dir = os.path.join(project_dir, "_relatorios")
237 | self.localizacao_da_pasta = reports_dir
238 | os.makedirs(reports_dir, exist_ok=True)
239 |
240 | # Generate individual reports for each file
241 | for file in python_files:
242 | file_result = analyze_file(file)
243 | if file_result:
244 | # Generate a report for the file
245 | report = self.generate_file_report(file, file_result)
246 |
247 | # Save the report in a separate folder
248 | report_file = os.path.join(reports_dir, f"{os.path.basename(file)}.txt")
249 | with open(report_file, "w") as f:
250 | f.write(report)
251 |
252 | self.results_text.setText("Individual reports generated successfully!")
253 | self.enable_new_button()
254 | except Exception as e:
255 | log_error(f"Error generating individual reports: {e}")
256 | self.results_text.setText(f"An error occurred: {str(e)}")
257 |
258 | def show_context_menu(self, pos):
259 | """Show context menu for selected file in the tree view"""
260 | selected_indexes = self.file_tree.selectedIndexes()
261 | if selected_indexes:
262 | selected_file = selected_indexes[0].data()
263 | context_menu = QMenu()
264 | generate_report_action = QAction(f"Generate Report for {selected_file}", self)
265 | generate_report_action.triggered.connect(lambda: self.generate_individual_report(None, None))
266 | context_menu.addAction(generate_report_action)
267 | context_menu.exec_(self.file_tree.mapToGlobal(pos))
268 |
269 | def display_documentation_results(self, documentation):
270 | """Display documentation results in the text area"""
271 | results_text = f"""
272 | Project Overview:
273 | {documentation['project_overview']}
274 | Module Interactions:
275 | {documentation['module_interactions']}
276 | File Summaries:
277 | """
278 | for file_path, file_info in documentation['file_summaries'].items():
279 | results_text += f"\n{file_path}:\n{file_info['summary']}\n"
280 | self.results_text.setText(results_text)
281 |
282 | def main():
283 | app = QApplication(sys.argv)
284 | main_window = DocumentationApp()
285 | main_window.show()
286 | sys.exit(app.exec_())
287 |
288 | if __name__ == "__main__":
289 | main()
--------------------------------------------------------------------------------
/main_functions.py:
--------------------------------------------------------------------------------
1 | import os
2 | import ast
3 | import json
4 | import time
5 | import numpy as np
6 | from typing import List, Dict
7 | import ollama
8 | from ollama import chat, ChatResponse
9 | import colorama
10 | from tqdm import tqdm
11 | import markdown
12 |
13 | # Configurações existentes mantidas
14 | colorama.init(autoreset=True)
15 |
16 | # Adicionando modelo de embeddings
17 |
18 | def log_info(message):
19 | """Prints informative messages in blue"""
20 | print(f"{colorama.Fore.CYAN}[INFO] {message}{colorama.Fore.RESET}")
21 |
22 | def log_warning(message):
23 | """Prints warning messages in yellow"""
24 | print(f"{colorama.Fore.YELLOW}[WARN] {message}{colorama.Fore.RESET}")
25 |
26 | def log_error(message):
27 | """Prints error messages in red"""
28 | print(f"{colorama.Fore.RED}[ERROR] {message}{colorama.Fore.RESET}")
29 |
30 | def log_success(message):
31 | """Prints success messages in green"""
32 | print(f"{colorama.Fore.GREEN}[SUCCESS] {message}{colorama.Fore.RESET}")
33 |
34 | def analyze_file(file_path: str) -> Dict:
35 | """Analyzes an individual Python file with enhanced metadata"""
36 | try:
37 | log_info(f"Analyzing file: {file_path}")
38 | start_time = time.time()
39 |
40 | with open(file_path, "r", encoding="utf-8") as f:
41 | code = f.read()
42 | tree = ast.parse(code)
43 | file_info = {
44 | "file": file_path,
45 | "filename": os.path.basename(file_path),
46 | "classes": [],
47 | "functions": [],
48 | "imports": [],
49 | "docstrings": [],
50 | "complexity": 0,
51 | "summary": "" # Placeholder for AI-generated summary
52 | }
53 |
54 | # Existing AST analysis logic
55 | for node in ast.iter_child_nodes(tree):
56 | if isinstance(node, ast.ClassDef):
57 | file_info["classes"].append({
58 | "name": node.name,
59 | "methods": [method.name for method in node.body if isinstance(method, ast.FunctionDef)]
60 | })
61 | elif isinstance(node, ast.FunctionDef):
62 | file_info["functions"].append({
63 | "name": node.name,
64 | "args": [arg.arg for arg in node.args.args],
65 | "complexity": len(list(ast.walk(node)))
66 | })
67 | elif isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom):
68 | file_info["imports"].append(ast.unparse(node))
69 | elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Str):
70 | file_info["docstrings"].append(node.value.s)
71 |
72 | # AI-generated summary (can be enhanced later)
73 | file_info["summary"] = generate_file_summary(file_info)
74 |
75 | end_time = time.time()
76 | log_success(f"File analysis completed in {end_time - start_time:.2f} seconds")
77 | return file_info
78 | except Exception as e:
79 | log_error(f"Error analyzing {file_path}: {e}")
80 | return {}
81 |
82 | def generate_file_summary(file_info: Dict) -> str:
83 | """Generate a basic summary for a file"""
84 | summary = f"Arquivo: {file_info['filename']}\n"
85 | summary += f"Classes: {', '.join([cls['name'] for cls in file_info['classes']] or ['Nenhuma'])}\n"
86 | summary += f"Funções: {', '.join([func['name'] for func in file_info['functions']] or ['Nenhuma'])}\n"
87 | return summary
88 |
89 | def collect_python_files(directory: str) -> List[str]:
90 | """Collects all Python files in a directory"""
91 | log_info(f"Collecting Python files in: {directory}")
92 | python_files = []
93 | total_files = 0
94 | for root, _, files in os.walk(directory):
95 | for file in files:
96 | if file.endswith(".py"):
97 | python_files.append(os.path.join(root, file))
98 | total_files += 1
99 |
100 | log_success(f"Found {total_files} Python files")
101 | return python_files
102 |
103 | def generate_embeddings(descriptions: Dict[str, str]) -> Dict[str, np.ndarray]:
104 | """Generate embeddings for file descriptions"""
105 | log_info("Generating semantic embeddings")
106 | embeddings = {}
107 |
108 | for file_path, description in descriptions.items():
109 | embedding = ollama.embeddings(
110 | model='mxbai-embed-large',
111 | prompt=description
112 | )
113 | embeddings[file_path] = embedding
114 |
115 | # Opcional: salvar embeddings
116 | embedding_dir = os.path.join(os.path.dirname(file_path), '.project_docs')
117 | os.makedirs(embedding_dir, exist_ok=True)
118 | np.save(os.path.join(embedding_dir, f"{os.path.basename(file_path)}.embedding.npy"), embedding)
119 |
120 | log_success(f"Generated {len(embeddings)} embeddings")
121 | return embeddings
122 |
123 | def search_project_files(project_dir: str, query: str, top_k: int = 5) -> List[tuple]:
124 | """Semantic search across project files"""
125 | log_info(f"Performing semantic search for query: {query}")
126 | query_embedding = ollama.embeddings(
127 | model='mxbai-embed-large',
128 | prompt=query
129 | )
130 | similarities = {}
131 |
132 | # Buscar arquivos e embeddings
133 | for root, _, files in os.walk(project_dir):
134 | for file in files:
135 | if file.endswith('.py'):
136 | embedding_path = os.path.join(root, '.project_docs', f"{file}.embedding.npy")
137 | if os.path.exists(embedding_path):
138 | file_embedding = np.load(embedding_path)
139 | # Calcula a similaridade entre os embeddings
140 | # A forma como você calcula a similaridade pode variar dependendo do tipo de embedding
141 | similarity = np.dot(query_embedding, file_embedding) / (
142 | np.linalg.norm(query_embedding) * np.linalg.norm(file_embedding)
143 | )
144 | similarities[os.path.join(root, file)] = similarity
145 |
146 | # Ordenar e retornar top k resultados
147 | sorted_results = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:top_k]
148 | log_success(f"Found {len(sorted_results)} relevant files")
149 | return sorted_results
150 |
151 | def generate_documentation(analysis_results: List[Dict], model: str = "qwen2.5:14b-instruct-q4_K_M") -> Dict:
152 | """Generates documentation using LLM"""
153 | documentation = {
154 | "project_overview": "",
155 | "file_summaries": {},
156 | "module_interactions": ""
157 | }
158 |
159 | # Project overview analysis
160 | log_info("Generating project overview")
161 | overview_prompt = "Analyze this project structure and provide a comprehensive overview:\n\n"
162 | for result in analysis_results:
163 | overview_prompt += f"File: {result['file']}\n"
164 | overview_prompt += f"Classes: {', '.join([cls['name'] for cls in result['classes']] or ['Nenhuma'])}\n"
165 | overview_prompt += f"Functions: {', '.join([func['name'] for func in result['functions']] or ['Nenhuma'])}\n\n"
166 |
167 | overview_prompt += "Describe the project's purpose, main components, and how they interact."
168 |
169 | try:
170 | response: ChatResponse = chat(model=model, messages=[
171 | {'role':'system', 'content': 'You are an expert in machine learning project analysis.'},
172 | {'role': 'user', 'content': overview_prompt}
173 | ])
174 | documentation["project_overview"] = response.message.content
175 | log_success("Project overview generated")
176 | except Exception as e:
177 | log_error(f"Error generating project overview: {e}")
178 |
179 | # Individual file summaries
180 | log_info("Generating file summaries")
181 | for result in tqdm(analysis_results, desc="Processing files"):
182 | try:
183 | file_summary_prompt = f"Analyze the file {result['file']} and explain its purpose and key components:\n"
184 | file_summary_prompt += f"Classes: {', '.join([cls['name'] for cls in result['classes']] or ['Nenhuma'])}\n"
185 | file_summary_prompt += f"Functions: {', '.join([func['name'] for func in result['functions']] or ['Nenhuma'])}\n"
186 |
187 | response: ChatResponse = chat(model=model, messages=[
188 | {'role':'system', 'content': 'You are an expert in code analysis.'},
189 | {'role': 'user', 'content': file_summary_prompt}
190 | ])
191 |
192 | documentation["file_summaries"][result['file']] = {
193 | "summary": response.message.content,
194 | "details": result
195 | }
196 | except Exception as e:
197 | log_warning(f"Error generating summary for {result['file']}: {e}")
198 |
199 | # Module interactions
200 | log_info("Generating module interaction description")
201 | try:
202 | interaction_prompt = "Describe how the modules and components in this project interact with each other."
203 | response: ChatResponse = chat(model=model, messages=[
204 | {'role':'system', 'content': 'You are an expert in software architecture.'},
205 | {'role': 'user', 'content': interaction_prompt}
206 | ])
207 | documentation["module_interactions"] = response.message.content
208 | log_success("Module interaction description generated")
209 | except Exception as e:
210 | log_error(f"Error generating module interactions: {e}")
211 |
212 | return documentation
213 |
214 | def save_documentation(documentation: Dict, output_dir: str = "project_docs"):
215 | """Saves documentation in multiple formats"""
216 | log_info(f"Saving documentation to directory: {output_dir}")
217 |
218 | # Create output directory
219 | os.makedirs(output_dir, exist_ok=True)
220 |
221 | # Save JSON
222 | json_path = os.path.join(output_dir, "project_documentation.json")
223 | try:
224 | with open(json_path, "w", encoding="utf-8") as f:
225 | json.dump(documentation, f, indent=2)
226 | log_success(f"JSON documentation saved to: {json_path}")
227 | except Exception as e:
228 | log_error(f"Error saving JSON: {e}")
229 |
230 | # Generate Markdown
231 | markdown_content = f"""# Project Documentation
232 |
233 | ## Project Overview
234 | {documentation['project_overview']}
235 |
236 | ## Module Interactions
237 | {documentation['module_interactions']}
238 |
239 | ## File Summaries
240 | """
241 |
242 | for file_path, file_info in documentation['file_summaries'].items():
243 | markdown_content += f"""
244 | ### {file_path}
245 | {file_info['summary']}
246 |
247 | #### Detailed Components
248 | - **Classes**: {', '.join([cls['name'] for cls in file_info['details']['classes']])}
249 | - **Functions**: {', '.join([func['name'] for func in file_info['details']['functions']])}
250 | """
251 |
252 | # Save Markdown
253 | md_path = os.path.join(output_dir, "project_documentation.md")
254 | try:
255 | with open(md_path, "w", encoding="utf-8") as f:
256 | f.write(markdown_content)
257 | log_success(f"Markdown documentation saved to: {md_path}")
258 | except Exception as e:
259 | log_error(f"Error saving Markdown: {e}")
260 |
261 | # Convert to HTML
262 | try:
263 | html_content = markdown.markdown(markdown_content)
264 | html_path = os.path.join(output_dir, "project_documentation.html")
265 | with open(html_path, "w", encoding="utf-8") as f:
266 | f.write(f"""
267 |
268 |
269 |
270 | Project Documentation
271 |
274 |
275 |
276 | {html_content}
277 |
278 |
279 | """)
280 | log_success(f"HTML documentation saved to: {html_path}")
281 | except Exception as e:
282 | log_error(f"Error saving HTML: {e}")
283 |
284 | if __name__ == "__main__":
285 | # Start of execution
286 | start_total_time = time.time()
287 | log_info("Starting project analysis")
288 |
289 | # Project directory
290 | project_dir = "/home/marcos/projetos_automatizacao/ENTENDER_textgrad/textgrad"
291 |
292 | # File collection
293 | python_files = collect_python_files(project_dir)
294 |
295 | # File analysis
296 | log_info("Starting detailed file analysis")
297 | results = []
298 | for file in tqdm(python_files, desc="Analyzing files"):
299 | print(file)
300 | file_result = analyze_file(file)
301 | if file_result:
302 | results.append(file_result)
303 |
304 | # Documentation generation
305 | log_info("Generating documentation with AI assistant")
306 | documentation = generate_documentation(results)
307 |
308 | # Saving documentation
309 | save_documentation(documentation)
310 |
311 | # Total execution time
312 | end_total_time = time.time()
313 | log_success(f"Analysis completed in {end_total_time - start_total_time:.2f} seconds")
314 | print("\nDocumentation generated successfully in 'project_docs' directory!")
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | annotated-types==0.7.0
2 | anyio==4.7.0
3 | certifi==2024.8.30
4 | colorama==0.4.6
5 | distro==1.9.0
6 | exceptiongroup==1.2.2
7 | h11==0.14.0
8 | httpcore==1.0.7
9 | httpx==0.28.1
10 | idna==3.10
11 | jiter==0.8.2
12 | Markdown==3.7
13 | openai==1.57.4
14 | pydantic==2.10.3
15 | pydantic_core==2.27.1
16 | sniffio==1.3.1
17 | tqdm==4.67.1
18 | typing_extensions==4.12.2
19 |
--------------------------------------------------------------------------------