Created
June 11, 2025 17:40
-
-
Save dotemacs/8d292700100e5af342c964276d1f6113 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Extract Augment Chat data from VS Code workspace storage. | |
This script: | |
1. Takes a workspace directory path | |
2. Calculates the VS Code workspace storage hash using the correct algorithm | |
3. Finds the corresponding SQLite database | |
4. Extracts the Augment chat data | |
5. Formats it as readable Markdown | |
Run it like so: ./extract_augment_chat.py /Users/foo/bar/baz -o foo.md | |
""" | |
import hashlib | |
import os | |
import subprocess | |
import platform | |
import sqlite3 | |
import json | |
import sys | |
import argparse | |
from datetime import datetime | |
from pathlib import Path | |
def get_vscode_workspace_hash(workspace_path): | |
""" | |
Generate VS Code workspace storage hash using the correct algorithm. | |
Algorithm: MD5(folder_path + round(birthtime_seconds * 1000)) | |
""" | |
try: | |
stat_info = os.stat(workspace_path) | |
birthtime_ms = round(stat_info.st_birthtime * 1000) if hasattr(stat_info, 'st_birthtime') else round(stat_info.st_ctime * 1000) | |
except Exception as e: | |
raise Exception(f"Error getting birthtime for {workspace_path}: {e}") | |
# VS Code algorithm: MD5 of path + birthtime_in_milliseconds | |
h = hashlib.md5() | |
h.update(workspace_path.encode('utf-8')) | |
h.update(str(birthtime_ms).encode('utf-8')) | |
return h.hexdigest(), birthtime_ms | |
def get_workspace_storage_path(): | |
"""Get the VS Code workspace storage directory path.""" | |
if platform.system() == 'Darwin': # macOS | |
return os.path.expanduser("~/Library/Application Support/Code/User/workspaceStorage") | |
elif platform.system() == 'Windows': | |
return os.path.expanduser("~/AppData/Roaming/Code/User/workspaceStorage") | |
else: # Linux | |
return os.path.expanduser("~/.config/Code/User/workspaceStorage") | |
def find_workspace_storage_directory(workspace_path): | |
""" | |
Find the actual workspace storage directory by searching for workspace.json files. | |
This is more reliable than calculating the hash due to potential birthtime precision issues. | |
""" | |
storage_base = get_workspace_storage_path() | |
# Search for workspace.json files that contain this workspace path | |
import glob | |
workspace_json_files = glob.glob(os.path.join(storage_base, "*/workspace.json")) | |
for json_file in workspace_json_files: | |
try: | |
with open(json_file, 'r') as f: | |
data = json.load(f) | |
if 'folder' in data: | |
folder_url = data['folder'] | |
if folder_url.startswith('file://'): | |
folder_path = folder_url[7:] | |
if folder_path == workspace_path: | |
return os.path.dirname(json_file) | |
except: | |
continue | |
return None | |
def extract_augment_chat_data(workspace_path): | |
""" | |
Extract Augment chat data from VS Code workspace storage. | |
Returns the raw JSON data from the SQLite database. | |
""" | |
# First try to calculate the workspace hash | |
try: | |
workspace_hash, birthtime_ms = get_vscode_workspace_hash(workspace_path) | |
print(f"Workspace path: {workspace_path}") | |
print(f"Birthtime (ms): {birthtime_ms}") | |
print(f"Calculated hash: {workspace_hash}") | |
except Exception as e: | |
print(f"Warning: Could not calculate hash: {e}") | |
workspace_hash = None | |
birthtime_ms = None | |
# Try to find the storage directory by calculated hash first | |
storage_base = get_workspace_storage_path() | |
storage_dir = None | |
if workspace_hash: | |
calculated_storage_dir = os.path.join(storage_base, workspace_hash) | |
if os.path.exists(calculated_storage_dir): | |
storage_dir = calculated_storage_dir | |
print(f"Found storage directory using calculated hash: {storage_dir}") | |
# If calculated hash doesn't work, search for the directory | |
if not storage_dir: | |
print("Calculated hash directory not found, searching for workspace...") | |
storage_dir = find_workspace_storage_directory(workspace_path) | |
if storage_dir: | |
actual_hash = os.path.basename(storage_dir) | |
print(f"Found storage directory by search: {storage_dir}") | |
print(f"Actual hash: {actual_hash}") | |
if not storage_dir: | |
raise Exception(f"No workspace storage directory found for: {workspace_path}") | |
sqlite_db = os.path.join(storage_dir, "state.vscdb") | |
print(f"SQLite database: {sqlite_db}") | |
print() | |
# Check if the SQLite database exists | |
if not os.path.exists(sqlite_db): | |
raise Exception(f"SQLite database not found: {sqlite_db}") | |
# Query the SQLite database | |
try: | |
conn = sqlite3.connect(sqlite_db) | |
cursor = conn.cursor() | |
cursor.execute("SELECT value FROM ItemTable WHERE key = 'memento/webviewView.augment-chat'") | |
result = cursor.fetchone() | |
conn.close() | |
if result is None: | |
raise Exception("No Augment chat data found in the database") | |
return result[0] | |
except sqlite3.Error as e: | |
raise Exception(f"SQLite error: {e}") | |
def format_timestamp(iso_string): | |
"""Convert ISO timestamp to readable format.""" | |
try: | |
dt = datetime.fromisoformat(iso_string.replace('Z', '+00:00')) | |
return dt.strftime('%Y-%m-%d %H:%M:%S UTC') | |
except: | |
return iso_string | |
def format_augment_chat_data(raw_json_data): | |
"""Format the Augment chat data as readable Markdown.""" | |
# Parse the JSON data | |
data = json.loads(raw_json_data) | |
webview_state = json.loads(data['webviewState']) | |
output = [] | |
output.append('# Augment Chat Data') | |
output.append('') | |
output.append(f'**Current Conversation ID:** `{webview_state["currentConversationId"]}`') | |
output.append('') | |
output.append('## Conversations') | |
output.append('') | |
for conv_id, conv in webview_state['conversations'].items(): | |
output.append(f'### Conversation: `{conv_id}`') | |
output.append(f'- **Created:** {format_timestamp(conv["createdAtIso"])}') | |
output.append(f'- **Last Interacted:** {format_timestamp(conv["lastInteractedAtIso"])}') | |
output.append(f'- **Pinned:** {conv["isPinned"]}') | |
output.append(f'- **Shareable:** {conv["isShareable"]}') | |
output.append('') | |
if conv['chatHistory']: | |
output.append('#### Chat History:') | |
output.append('') | |
for i, msg in enumerate(conv['chatHistory'], 1): | |
output.append(f'**Message {i}:**') | |
# Handle optional fields gracefully | |
if 'status' in msg: | |
output.append(f'- **Status:** {msg["status"]}') | |
if 'request_id' in msg: | |
output.append(f'- **Request ID:** `{msg["request_id"]}`') | |
if 'timestamp' in msg: | |
output.append(f'- **Timestamp:** {format_timestamp(msg["timestamp"])}') | |
if 'request_message' in msg: | |
output.append(f'- **Request:** {msg["request_message"]}') | |
output.append('') | |
output.append('**Response:**') | |
output.append('') | |
# Format the response text with proper markdown | |
response_text = msg.get('response_text', 'No response text available') | |
output.append(response_text) | |
output.append('') | |
output.append('---') | |
output.append('') | |
else: | |
output.append('*No chat history*') | |
output.append('') | |
return '\n'.join(output) | |
def main(): | |
parser = argparse.ArgumentParser(description='Extract Augment Chat data from VS Code workspace storage') | |
parser.add_argument('workspace_path', help='Path to the workspace directory') | |
parser.add_argument('-o', '--output', help='Output file (default: print to stdout)') | |
parser.add_argument('--raw', action='store_true', help='Output raw JSON instead of formatted Markdown') | |
args = parser.parse_args() | |
# Resolve the workspace path to absolute path | |
workspace_path = os.path.abspath(args.workspace_path) | |
if not os.path.exists(workspace_path): | |
print(f"Error: Workspace path does not exist: {workspace_path}", file=sys.stderr) | |
sys.exit(1) | |
if not os.path.isdir(workspace_path): | |
print(f"Error: Workspace path is not a directory: {workspace_path}", file=sys.stderr) | |
sys.exit(1) | |
try: | |
# Extract the raw data | |
raw_data = extract_augment_chat_data(workspace_path) | |
if args.raw: | |
output_data = raw_data | |
else: | |
# Format as Markdown | |
output_data = format_augment_chat_data(raw_data) | |
# Output the result | |
if args.output: | |
with open(args.output, 'w') as f: | |
f.write(output_data) | |
print(f"Output written to: {args.output}") | |
else: | |
print(output_data) | |
except Exception as e: | |
print(f"Error: {e}", file=sys.stderr) | |
sys.exit(1) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment