Skip to content

Instantly share code, notes, and snippets.

@dotemacs
Created June 11, 2025 17:40
Show Gist options
  • Save dotemacs/8d292700100e5af342c964276d1f6113 to your computer and use it in GitHub Desktop.
Save dotemacs/8d292700100e5af342c964276d1f6113 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Extract Augment Chat data from VS Code workspace storage.
This script:
1. Takes a workspace directory path
2. Calculates the VS Code workspace storage hash using the correct algorithm
3. Finds the corresponding SQLite database
4. Extracts the Augment chat data
5. Formats it as readable Markdown
Run it like so: ./extract_augment_chat.py /Users/foo/bar/baz -o foo.md
"""
import hashlib
import os
import subprocess
import platform
import sqlite3
import json
import sys
import argparse
from datetime import datetime
from pathlib import Path
def get_vscode_workspace_hash(workspace_path):
"""
Generate VS Code workspace storage hash using the correct algorithm.
Algorithm: MD5(folder_path + round(birthtime_seconds * 1000))
"""
try:
stat_info = os.stat(workspace_path)
birthtime_ms = round(stat_info.st_birthtime * 1000) if hasattr(stat_info, 'st_birthtime') else round(stat_info.st_ctime * 1000)
except Exception as e:
raise Exception(f"Error getting birthtime for {workspace_path}: {e}")
# VS Code algorithm: MD5 of path + birthtime_in_milliseconds
h = hashlib.md5()
h.update(workspace_path.encode('utf-8'))
h.update(str(birthtime_ms).encode('utf-8'))
return h.hexdigest(), birthtime_ms
def get_workspace_storage_path():
"""Get the VS Code workspace storage directory path."""
if platform.system() == 'Darwin': # macOS
return os.path.expanduser("~/Library/Application Support/Code/User/workspaceStorage")
elif platform.system() == 'Windows':
return os.path.expanduser("~/AppData/Roaming/Code/User/workspaceStorage")
else: # Linux
return os.path.expanduser("~/.config/Code/User/workspaceStorage")
def find_workspace_storage_directory(workspace_path):
"""
Find the actual workspace storage directory by searching for workspace.json files.
This is more reliable than calculating the hash due to potential birthtime precision issues.
"""
storage_base = get_workspace_storage_path()
# Search for workspace.json files that contain this workspace path
import glob
workspace_json_files = glob.glob(os.path.join(storage_base, "*/workspace.json"))
for json_file in workspace_json_files:
try:
with open(json_file, 'r') as f:
data = json.load(f)
if 'folder' in data:
folder_url = data['folder']
if folder_url.startswith('file://'):
folder_path = folder_url[7:]
if folder_path == workspace_path:
return os.path.dirname(json_file)
except:
continue
return None
def extract_augment_chat_data(workspace_path):
"""
Extract Augment chat data from VS Code workspace storage.
Returns the raw JSON data from the SQLite database.
"""
# First try to calculate the workspace hash
try:
workspace_hash, birthtime_ms = get_vscode_workspace_hash(workspace_path)
print(f"Workspace path: {workspace_path}")
print(f"Birthtime (ms): {birthtime_ms}")
print(f"Calculated hash: {workspace_hash}")
except Exception as e:
print(f"Warning: Could not calculate hash: {e}")
workspace_hash = None
birthtime_ms = None
# Try to find the storage directory by calculated hash first
storage_base = get_workspace_storage_path()
storage_dir = None
if workspace_hash:
calculated_storage_dir = os.path.join(storage_base, workspace_hash)
if os.path.exists(calculated_storage_dir):
storage_dir = calculated_storage_dir
print(f"Found storage directory using calculated hash: {storage_dir}")
# If calculated hash doesn't work, search for the directory
if not storage_dir:
print("Calculated hash directory not found, searching for workspace...")
storage_dir = find_workspace_storage_directory(workspace_path)
if storage_dir:
actual_hash = os.path.basename(storage_dir)
print(f"Found storage directory by search: {storage_dir}")
print(f"Actual hash: {actual_hash}")
if not storage_dir:
raise Exception(f"No workspace storage directory found for: {workspace_path}")
sqlite_db = os.path.join(storage_dir, "state.vscdb")
print(f"SQLite database: {sqlite_db}")
print()
# Check if the SQLite database exists
if not os.path.exists(sqlite_db):
raise Exception(f"SQLite database not found: {sqlite_db}")
# Query the SQLite database
try:
conn = sqlite3.connect(sqlite_db)
cursor = conn.cursor()
cursor.execute("SELECT value FROM ItemTable WHERE key = 'memento/webviewView.augment-chat'")
result = cursor.fetchone()
conn.close()
if result is None:
raise Exception("No Augment chat data found in the database")
return result[0]
except sqlite3.Error as e:
raise Exception(f"SQLite error: {e}")
def format_timestamp(iso_string):
"""Convert ISO timestamp to readable format."""
try:
dt = datetime.fromisoformat(iso_string.replace('Z', '+00:00'))
return dt.strftime('%Y-%m-%d %H:%M:%S UTC')
except:
return iso_string
def format_augment_chat_data(raw_json_data):
"""Format the Augment chat data as readable Markdown."""
# Parse the JSON data
data = json.loads(raw_json_data)
webview_state = json.loads(data['webviewState'])
output = []
output.append('# Augment Chat Data')
output.append('')
output.append(f'**Current Conversation ID:** `{webview_state["currentConversationId"]}`')
output.append('')
output.append('## Conversations')
output.append('')
for conv_id, conv in webview_state['conversations'].items():
output.append(f'### Conversation: `{conv_id}`')
output.append(f'- **Created:** {format_timestamp(conv["createdAtIso"])}')
output.append(f'- **Last Interacted:** {format_timestamp(conv["lastInteractedAtIso"])}')
output.append(f'- **Pinned:** {conv["isPinned"]}')
output.append(f'- **Shareable:** {conv["isShareable"]}')
output.append('')
if conv['chatHistory']:
output.append('#### Chat History:')
output.append('')
for i, msg in enumerate(conv['chatHistory'], 1):
output.append(f'**Message {i}:**')
# Handle optional fields gracefully
if 'status' in msg:
output.append(f'- **Status:** {msg["status"]}')
if 'request_id' in msg:
output.append(f'- **Request ID:** `{msg["request_id"]}`')
if 'timestamp' in msg:
output.append(f'- **Timestamp:** {format_timestamp(msg["timestamp"])}')
if 'request_message' in msg:
output.append(f'- **Request:** {msg["request_message"]}')
output.append('')
output.append('**Response:**')
output.append('')
# Format the response text with proper markdown
response_text = msg.get('response_text', 'No response text available')
output.append(response_text)
output.append('')
output.append('---')
output.append('')
else:
output.append('*No chat history*')
output.append('')
return '\n'.join(output)
def main():
parser = argparse.ArgumentParser(description='Extract Augment Chat data from VS Code workspace storage')
parser.add_argument('workspace_path', help='Path to the workspace directory')
parser.add_argument('-o', '--output', help='Output file (default: print to stdout)')
parser.add_argument('--raw', action='store_true', help='Output raw JSON instead of formatted Markdown')
args = parser.parse_args()
# Resolve the workspace path to absolute path
workspace_path = os.path.abspath(args.workspace_path)
if not os.path.exists(workspace_path):
print(f"Error: Workspace path does not exist: {workspace_path}", file=sys.stderr)
sys.exit(1)
if not os.path.isdir(workspace_path):
print(f"Error: Workspace path is not a directory: {workspace_path}", file=sys.stderr)
sys.exit(1)
try:
# Extract the raw data
raw_data = extract_augment_chat_data(workspace_path)
if args.raw:
output_data = raw_data
else:
# Format as Markdown
output_data = format_augment_chat_data(raw_data)
# Output the result
if args.output:
with open(args.output, 'w') as f:
f.write(output_data)
print(f"Output written to: {args.output}")
else:
print(output_data)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment