Created
October 15, 2024 20:20
-
-
Save 0bserver07/9e8431fad998eac2d785d29eea6b73d4 to your computer and use it in GitHub Desktop.
Python Package Parser For LLMs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# note replace PACKAGE_NAME | |
import os | |
import ast | |
import json | |
def parse_package(package_root): | |
package_structure = {} | |
for root, dirs, files in os.walk(package_root): | |
relative_path = os.path.relpath(root, package_root) | |
current_level = package_structure | |
for part in relative_path.split(os.sep): | |
if part not in current_level: | |
current_level[part] = {} | |
current_level = current_level[part] | |
for file in files: | |
if file.endswith('.py'): | |
file_path = os.path.join(root, file) | |
with open(file_path, 'r') as f: | |
content = f.read() | |
current_level[file] = { | |
'content': content, | |
'ast': parse_ast(content) | |
} | |
elif file in ['setup.py', 'README.md', 'LICENSE', 'CHANGELOG.md', 'CONTRIBUTING.md']: | |
file_path = os.path.join(root, file) | |
with open(file_path, 'r') as f: | |
content = f.read() | |
current_level[file] = { | |
'content': content | |
} | |
return package_structure | |
def parse_ast(content): | |
try: | |
tree = ast.parse(content) | |
return { | |
'imports': extract_imports(tree), | |
'functions': extract_functions(tree), | |
'classes': extract_classes(tree) | |
} | |
except SyntaxError: | |
return {'error': 'Unable to parse AST'} | |
def extract_imports(tree): | |
imports = [] | |
for node in ast.walk(tree): | |
if isinstance(node, ast.Import): | |
for alias in node.names: | |
imports.append(alias.name) | |
elif isinstance(node, ast.ImportFrom): | |
module = node.module or '' | |
for alias in node.names: | |
imports.append(f"{module}.{alias.name}") | |
return imports | |
def extract_functions(tree): | |
functions = [] | |
for node in ast.walk(tree): | |
if isinstance(node, ast.FunctionDef): | |
functions.append({ | |
'name': node.name, | |
'args': [arg.arg for arg in node.args.args], | |
'docstring': ast.get_docstring(node) | |
}) | |
return functions | |
def extract_classes(tree): | |
classes = [] | |
for node in ast.walk(tree): | |
if isinstance(node, ast.ClassDef): | |
classes.append({ | |
'name': node.name, | |
'methods': [method.name for method in node.body if isinstance(method, ast.FunctionDef)], | |
'docstring': ast.get_docstring(node) | |
}) | |
return classes | |
def analyze_package(package_root): | |
package_structure = parse_package(package_root) | |
# Analyze the package structure and prepare context for LLM | |
analysis = { | |
'structure': package_structure, | |
'summary': { | |
'total_files': sum(1 for _ in os.walk(package_root) for _ in _[2] if _.endswith('.py')), | |
'total_modules': len(package_structure.get('PACKAGE_NAME', {})), | |
'main_modules': list(package_structure.get('PACKAGE_NAME', {}).keys()), | |
'test_files': list(package_structure.get('tests', {}).keys()) | |
} | |
} | |
return analysis | |
if __name__ == '__main__': | |
package_root = '.' # Assumes the script is run from the package root | |
analysis = analyze_package(package_root) | |
# Output the analysis as JSON for further processing | |
print(json.dumps(analysis, indent=2)) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# NOTE replace the OLLAMA URL: | |
import json | |
import requests | |
import os | |
# Load the analysis data | |
with open('analysis.json', 'r') as f: | |
analysis = json.load(f) | |
# Ollama API endpoint | |
OLLAMA_API_URL = "https://XXXXXXXXXXXXX/api/generate" # Adjust this URL if needed | |
def send_to_ollama(content, file_name): | |
""" | |
Send the file content to Ollama API and return the response. | |
""" | |
prompt = f"You are an expert in Python and writing python libraries for data, minimal. Analyze the following Python code from the file {file_name}:\n\n{content}\n\n, Explain what the functions do each, and provide feedback on code quality, suggest improvements, and if necessary, provide a rewritten version." | |
payload = { | |
"model": "gemma2:9b", # Change this to the model you're using | |
"prompt": prompt, | |
"stream": False | |
} | |
try: | |
response = requests.post(OLLAMA_API_URL, json=payload) | |
response.raise_for_status() | |
return response.json()['response'] | |
except requests.RequestException as e: | |
print(f"Error sending request to Ollama API for {file_name}: {e}") | |
return None | |
def process_files(structure, base_path="experiment_kayos_data_interpreter_XNUMBER"): | |
""" | |
Recursively process files in the structure and send their content to Ollama. | |
Store the results in a folder with corresponding file names. | |
""" | |
if not os.path.exists(base_path): | |
os.makedirs(base_path) | |
for key, value in structure.items(): | |
if isinstance(value, dict): | |
if 'content' in value: | |
# This is a file | |
print(f"Processing file: {key}") | |
ollama_response = send_to_ollama(value['content'], key) | |
if (ollama_response): | |
print(f"Ollama's analysis for {key}:") | |
print(ollama_response) | |
print("\n" + "="*50 + "\n") | |
# Save the response to a file | |
file_path = os.path.join(base_path, key) | |
with open(file_path, 'w') as f: | |
f.write(ollama_response) | |
else: | |
# This is a directory | |
new_base_path = os.path.join(base_path, key) | |
process_files(value, new_base_path) | |
# Start processing from the root of the structure | |
process_files(analysis['structure']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment