Last active
October 1, 2021 21:22
-
-
Save dkapila/479232696644bf6e8711bc6e2a752174 to your computer and use it in GitHub Desktop.
Prepare Slack Export for Roam/Logseq/Obsidian
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import json | |
import glob | |
import os | |
from pathlib import Path | |
import re | |
import datetime | |
from datetime import timezone | |
import pytz | |
SLACK_EXPORT_FROM_PATH = './slack_export' | |
SLACK_EXPORT_TO_PATH = './output' | |
USE_CLOZE_SYNTAX_FOR_USERNAMES = False | |
USE_CARDINAL_DATES = False | |
combined_data = [] | |
user_names = {} | |
df_copy = pd.DataFrame(columns=['Date', 'Channel', 'Data']) | |
## List of available timezones: https://stackoverflow.com/questions/13866926/is-there-a-list-of-pytz-timezones | |
slack_time_zone = pytz.timezone('America/Los_Angeles') | |
user_ids_to_remove = {} | |
def run(): | |
print("Getting Channel names...") | |
channel_names = get_channel_names() | |
print("Getting User names...") | |
set_user_names() | |
print("Preparing Data...") | |
for idx, val in enumerate(channel_names): | |
add_daily_comments_from_channel(val) | |
print("Creating Markdown Pages...") | |
global df_copy | |
df_copy = pd.DataFrame(combined_data, columns=['Date', 'Channel', 'Data']) | |
df = pd.DataFrame(combined_data, columns=['Date', 'Channel', 'Data']) | |
df.groupby('Date').apply(buildDailyPages) | |
print("Export complete. All markdown files are in " + SLACK_EXPORT_TO_PATH) | |
def get_channel_names(): | |
json_file_path = SLACK_EXPORT_FROM_PATH + '/channels.json' | |
with open(json_file_path, 'r') as j: | |
contents = json.loads(j.read()) | |
channel_names = [] | |
for key in contents: | |
channel_names.append(key['name']) | |
return channel_names | |
def set_user_names(): | |
json_file_path = SLACK_EXPORT_FROM_PATH + '/users.json' | |
with open(json_file_path, 'r') as j: | |
contents = json.loads(j.read()) | |
for key in contents: | |
real_name = '' | |
if ('real_name' in key): | |
real_name = key['real_name'] | |
user_names[key['id']] = {'name': key['name'], 'real_name': real_name} | |
def add_daily_comments_from_channel(channel): | |
files_in_channel = glob.glob(SLACK_EXPORT_FROM_PATH + "/" + channel + "/*.json") | |
for idx, file_path in enumerate(files_in_channel): | |
with open(file_path, 'r') as j: | |
file_name = Path(file_path).stem | |
contents = json.loads(j.read()) | |
combined_data.append([file_name, channel, contents]) | |
def clean_up_text(text, user_id): | |
user_name_text = '' | |
if user_id in user_names: | |
user_name = user_names[user_id]['name'] | |
real_name = user_names[user_id]['real_name'] | |
if (USE_CLOZE_SYNTAX_FOR_USERNAMES): | |
user_name_text = "{{=: @" + user_name + " | " + real_name + "}} : " | |
else: | |
user_name_text = user_name + "(" + real_name + ") : " | |
users = re.findall(r"\<@U([A-Za-z0-9_]+)\>", text) | |
for user in users: | |
user_name_mentioned_text = '' | |
user_id = 'U' + user | |
if user_id in user_names: | |
user_name = user_names[user_id]['name'] | |
real_name = user_names[user_id]['real_name'] | |
if (USE_CLOZE_SYNTAX_FOR_USERNAMES): | |
user_name_mentioned_text = "{{=: @" + user_name + " | " + real_name + "}}" | |
else: | |
user_name_mentioned_text = user_name + "(" + real_name + ") : " | |
text = text.replace('<@' + user_id + '>', user_name_mentioned_text) | |
return user_name_text + text.strip().replace('\n', '') | |
def get_replies(replies, channel_name): | |
replies_on_post = [] | |
for reply in replies: | |
ts = reply['ts'] | |
date_ts = datetime.datetime.fromtimestamp(float(ts), tz=slack_time_zone).strftime("%Y-%m-%d") | |
user = reply['user'] | |
if user in user_ids_to_remove: | |
continue | |
df = df_copy[(df_copy['Date'] == date_ts)] | |
json_arr = df[(df['Channel'] == channel_name)]['Data'] | |
if (json_arr.empty): | |
continue | |
for message in json_arr.iloc[0]: | |
if ('ts' in message and 'user' in message and message['ts'] == ts and message['user'] == user): | |
replies_on_post.append(message) | |
return replies_on_post | |
def add_blocks(daily_note_file_path, channel_name, data): | |
with open(daily_note_file_path, 'a') as file: | |
file.write('- ' + '[[' + channel_name + ']]' + '\n') | |
for block in data: | |
if('parent_user_id' in block) or ('subtype' in block) or ('user' not in block): | |
continue | |
user_id = block['user'] | |
if user_id in user_ids_to_remove: | |
continue | |
text = clean_up_text(block['text'], block['user']) | |
spacing = ' - ' | |
file.write(spacing + text + '\n') | |
if ('replies' in block): | |
spacing = ' - ' | |
replies = get_replies(block['replies'], channel_name) | |
for reply_block in replies: | |
text = clean_up_text(reply_block['text'], reply_block['user']) | |
file.write(spacing + text + '\n') | |
def make_ordinal(n): | |
n = int(n) | |
suffix = ['th', 'st', 'nd', 'rd', 'th'][min(n % 10, 4)] | |
if 11 <= (n % 100) <= 13: | |
suffix = 'th' | |
return str(n) + suffix | |
def get_roam_date_page_from_date(date): | |
date_time_obj = datetime.datetime.strptime(date, '%Y-%m-%d') | |
full_month_name = date_time_obj.strftime("%B") | |
year = date_time_obj.strftime("%Y") | |
day = date_time_obj.strftime(" %d").replace(' 0', '').replace(' ', '') | |
day = make_ordinal(day) | |
roam_date = full_month_name + " " + day + ", " + year | |
return roam_date | |
def get_date_string(date): | |
if (USE_CARDINAL_DATES): | |
date = get_roam_date_page_from_date(date) | |
else: | |
date = date.replace("-", "_") | |
return date | |
def add_notes_to_day_page(df): | |
channel_name = df['Channel'].iloc[0] | |
date = df['Date'].iloc[0] | |
data = df['Data'].iloc[0] | |
date = get_date_string(date) | |
daily_note_file_path = SLACK_EXPORT_TO_PATH + '/' + date + '.md' | |
with open(daily_note_file_path, 'a') as fp: | |
add_blocks(daily_note_file_path, channel_name, data) | |
def buildDailyPages(df): | |
date = df['Date'].iloc[0] | |
date = get_date_string(date) | |
print("Creating file: " + date + ".md") | |
daily_note_file_path = SLACK_EXPORT_TO_PATH + '/' + date + '.md' | |
with open(daily_note_file_path, 'w') as fp: | |
pass | |
df.groupby('Channel').apply(add_notes_to_day_page) | |
if __name__ == "__main__": | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment