Last active
September 24, 2023 15:12
-
-
Save reagle/7418f54fb6e40fe8d925e1c3f5325076 to your computer and use it in GitHub Desktop.
Convert Zim exported markdown to Obsidian by removing first heading and converting setext to atx headers
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Convert Zim exported markdown to Obsidian by: | |
1. removing first heading | |
2. converting setext to atx headers | |
""" | |
# https://gist.github.com/reagle/7418f54fb6e40fe8d925e1c3f5325076 | |
import re | |
from pathlib import Path | |
def convert_setext_to_atx(match): | |
level = "#" * (1 if match.group(2) == "=" else 2) | |
print(f" {level} {match.group(1)}") | |
return f"{level} {match.group(1)}" | |
def remove_first_line_if_heading(content): | |
lines = content.splitlines() | |
if lines and lines[0].startswith("# "): | |
return "\n".join(lines[1:]) | |
return content | |
def process_file(file_path): | |
with file_path.open("r", encoding="utf-8") as file: | |
content = file.read() | |
content = remove_first_line_if_heading(content) | |
setext_header_pattern = r"^(.*)\n([=-])+(?=\n|$)" | |
content = re.sub( | |
setext_header_pattern, convert_setext_to_atx, content, flags=re.MULTILINE | |
) | |
with file_path.open("w", encoding="utf-8") as file: | |
file.write(content) | |
def main(): | |
root_folder = Path.cwd() | |
for file_path in root_folder.rglob("*.md"): | |
process_file(file_path) | |
print(f"Processed {file_path}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment