karlrwjohnson · April 12, 2021 00:22
diff --git a/patch_pdf_bookmarks.py b/patch_pdf_bookmarks.py
 #!/usr/bin/python3
 ##
 ##  PDF Bookmark Patcher script
 ##
 ##  This script allows you to use a regular text editor to edit the bookmarks
 ##  in a PDF file. It is a wrapper around another tool called PDFtk-java.
 ##
 ##  Usage:
 ##  1. < replace somefile.pdf with the name of your file >
 ##  2. python3 ../extract_bookmarks.py somefile.pdf --export-text bookmarks.txt
 ##  3. < edit bookmarks.txt >
 ##  4. python3 ../extract_bookmarks.py somefile.pdf --import-text bookmarks.txt
 ##  5. < somefile.bookmarked.pdf is a copy of the file with updated bookmarks >
 ##
 ##  Requires:
 ##  - PDFtk-java (https://gitlab.com/pdftk-java/pdftk, https://linuxhint.com/install_pdftk_ubuntu/)
 ##    - Make sure that `pdftk` is present in your PATH
 ##  - Python 3
 ##

 import html
 import re
 import sys
 from argparse import ArgumentParser
 from bs4 import BeautifulSoup
 from subprocess import Popen, PIPE
 from logging import getLogger, DEBUG, basicConfig
 from textwrap import dedent
 from typing import List, NamedTuple, Union

 logger = getLogger()
 basicConfig(format='%(levelname)s - %(message)s')


 class PdfBookmark(NamedTuple):
    title: str
    level: int
    page_number: int


 class BookmarkTreeNode(NamedTuple):
    title: str
    level: int
    page_number: int
    children: List['BookmarkTreeNode']


 class TextTreeNode(NamedTuple):
    content: str
    indent: str
    line_number: int
    children: List['TextTreeNode']
        


 def main():
    parser = ArgumentParser(description='Use pdftk to edit bookmarks on a PDF')
    parser.add_argument('pdf_file', help='PDF file to process')
    parser.add_argument('--export-text', help='Export ')
    parser.add_argument('--import-text', help='HTML file of bookmarks')
    parser.add_argument('--output-pdf', help='PDF file to output to')
    parser.add_argument('--verbose', help='Enable verbose logging', action='store_true')

    args = parser.parse_args()

    # if args.verbose:
        # logger.setLevel(DEBUG)
    logger.setLevel(DEBUG)

    if args.export_text:
        export_text(args.pdf_file, args.export_text)
    elif args.import_text:
        import_text(args.pdf_file, args.import_text, args.output_pdf)


 def export_text(pdf_filename:str, bookmark_filename: str):
    with open(bookmark_filename, 'w') as outfile:
        data_lines: List[str] = dump_pdf_data(pdf_filename)
        bookmarks: List[PdfBookmark] = parse_bookmarks_from_pdf_data(data_lines)
        tree: List[BookmarkTreeNode] = build_bookmark_tree(bookmarks)
        html: str = export_bookmarks_text(tree, pdf_filename, bookmark_filename)
        outfile.write(html)


 def import_text(pdf_filename:str, bookmark_filename: str, output_filename: Union[str, None]):
    if not output_filename:
        output_filename = re.sub(r'\.pdf', '.bookmarked.pdf', pdf_filename)
        logger.info('Automatically generating output filename %s', output_filename)

    with open(bookmark_filename) as infile:
        file_lines: List[str] = infile.read().split('\n')

    text_tree: List[TextTreeNode] = import_indented_text(file_lines)
    bookmarks: List[PdfBookmark] = build_pdf_bookmark_list(text_tree)
    logger.info(f'Loaded {len(bookmarks)} bookmarks with {sum(1 for x in bookmarks if x.page_number != 0)} page numbers')

    original_data_lines: List[str] = dump_pdf_data(pdf_filename)
    patched_data_lines: List[str] = patch_bookmarks_into_pdf_data(original_data_lines, bookmarks)
    update_pdf_data(pdf_filename, output_filename, patched_data_lines)



 def dump_pdf_data(filename: str) -> List[str]:
    command = ['pdftk', filename, 'dump_data_utf8']
    logger.info(f'Running command: {command!r}')
    popen = Popen(command, stdout=PIPE)
    stdout_data, stderr_data = popen.communicate()
    assert popen.returncode == 0, f'{command} failed (exit code {popen.returncode})'
    ret = stdout_data.decode().split('\n')
    logger.debug(f'Command returned {len(ret)} lines')
    return ret


 def update_pdf_data(input_filename: str, output_filename: str, contents: List[str]) -> None:
    command = ['pdftk', input_filename, 'update_info_utf8', '-', 'output', output_filename, 'verbose']
    content_bytes = '\n'.join(contents).encode('utf-8')
    logger.info(f'Running command with {len(contents)} lines of input: {command!r}')
    popen = Popen(command, stdin=PIPE, stdout=PIPE)
    stdout_data, stderr_data = popen.communicate(content_bytes)
    print(stdout_data.decode())
    assert popen.returncode == 0, f'{command} failed (exit code {popen.returncode})'


 def parse_bookmarks_from_pdf_data(lines: List[str]) -> List[PdfBookmark]:
    ret: List[PdfBookmark] = []

    line_number = 0

    current_bookmark_fields: Dict = {}
    def flush():
        # Extract current state and perform reset immediately
        # so we can return early on error
        flushed_state = {**current_bookmark_fields}
        current_bookmark_fields.clear()

        # Initial record will cause a flush. This should not be an error.
        if not flushed_state.keys():
            return

        # Do not create bookmark if necessary keys are missing, but keep parsing bookmarks
        expected_keys = PdfBookmark._fields
        missing_keys = expected_keys - flushed_state.keys()
        if missing_keys:
            logger.warn('Missing keys %s when flushing bookmark on line %s', missing_keys, line_number)
            return

        # Flag when extra keys exist
        extra_keys = flushed_state.keys() - expected_keys
        if extra_keys:
            logger.warn('Extra keys %s when flushing bookmark on line %s', extra_keys, line_number)
            # Do not abort

        bookmark = PdfBookmark(**{key: flushed_state[key] for key in expected_keys})
        ret.append(bookmark)

    for line in lines:
        line_number += 1
        prop, value, *_ = line.split(': ', maxsplit=1) + [None]
        if prop == 'BookmarkBegin':
            flush()
        elif prop == 'BookmarkTitle':
            if value is None:
                logger.warn('Missing value on line %s', line_number)
                continue
            current_bookmark_fields['title'] = html.unescape(value)
        elif prop == 'BookmarkLevel':
            if value is None:
                logger.warn('Missing value on line %s', line_number)
                continue
            try:
                current_bookmark_fields['level'] = int(value)
            except ValueError:
                logger.warn('Cannot parse %s as an integer on line %s', value, line_number)
        elif prop == 'BookmarkPageNumber':
            if value is None:
                logger.warn('Missing value on line %s', line_number)
                continue
            try:
                current_bookmark_fields['page_number'] = int(value)
            except ValueError:
                logger.warn('Cannot parse %s as an integer on line %s', value, line_number)

    flush()

    return ret


 def patch_bookmarks_into_pdf_data(pdftk_data: List[str], bookmarks: List[PdfBookmark]) -> List[str]:
    ret: List[str] = []
    data_written = False

    bookmark_lines = [
        line
        for bookmark in bookmarks
        for line in [
            'BookmarkBegin',
            f'BookmarkTitle: {html.escape(bookmark.title)}',
            f'BookmarkLevel: {bookmark.level}',
            f'BookmarkPageNumber: {bookmark.page_number}',
        ]
    ]

    for line in pdftk_data:
        if line.startswith('BookmarkBegin'):
            if not data_written:
                data_written = True
                ret += bookmark_lines
            continue
        elif line.startswith('BookmarkTitle') or line.startswith('BookmarkLevel') or line.startswith('BookmarkPageNumber'):
            continue
        ret.append(line)

    if not data_written:
        data_written = True
        ret += bookmark_lines

    return ret


 def build_bookmark_tree(bookmarks: List[PdfBookmark]) -> List[BookmarkTreeNode]:
    logger.debug('converting list of %s bookmarks into a tree based on their "level" properties', len(bookmarks))

    # The value that we'll return -- it's a list of bookmark nodes with level=1
    # Elements will be added to this via ancestor_child_lists
    root_node_list: List[BookmarkTreeNode] = [];

    # As we build the tree, use a stack to track the current node's ancestors
    # -- Or rather, the ancestors' lists of child nodes.
    # The first list is the list of "root nodes", who have level=1
    # The second list is the list of children of the current level=1 node,
    # for whome level=2 and so forth.
    ancestor_child_lists: List[List[BookmarkTreeNode]] = [root_node_list]


    def get_current_level():
        return len(ancestor_child_lists)


    def push_node(node: BookmarkTreeNode):
        # Add the node to its direct ancestor's list of children
        ancestor_child_lists[-1].append(node)

        # Push its child list onto the stack of ancestors, so further calls to
        # push_node will add children to this node (see above)
        ancestor_child_lists.append(node.children)


    def pop_node():
        if get_current_level() <= 1:
            raise RuntimeError("Bookmark level should not go below 1")
        ancestor_child_lists.pop()


    for bookmark in bookmarks:
        # The algorithm is centered around "add a node to its parent"
        # For that to work, the parent must be exactly one level higher
        # than the child.

        # If the current node is higher than the stack indicates,
        # peel back nodes until the stack is pointing to the appropriate parent
        while get_current_level() > bookmark.level:
            pop_node()

        # If the next bookmark jumped down a level too far (e.g. level 3 to level 5),
        # add intermediate nodes to fill the gap
        while get_current_level() < bookmark.level:
            placeholder_node = BookmarkTreeNode(
                title="<placeholder>",
                level=bookmark.level,
                page_number=0,
                children=[]
            )
            push_node(placeholder_node)

        # Add the current node
        node = BookmarkTreeNode(
            title=bookmark.title,
            level=bookmark.level,
            page_number=bookmark.page_number,
            children=[]
        )
        push_node(node)

    logger.debug('finished generating bookmark tree')
    return root_node_list


 def export_bookmarks_text(bookmark_tree: List[BookmarkTreeNode], pdf_filename: str, bookmark_filename: str) -> str:
    logger.debug('generating YML file from tree')

    instructions = dedent(f"""\
        # Bookmarks extracted from {pdf_filename}
        #
        # Instructions:
        #
        # Using a text editor, update the following list of bookmarks.
        # Each bookmark should appear on its own line, and it should follow the format
        #   <page number>. <title>
        # To make some bookmarks appear as children of others, use spaces or tabs to indent.
        #
        # Blank lines and lines starting with a "#" (like this one) are comments and will not be interpreted as a bookmark
        #
        # Then, import them back into the PDF file using the command:
        #   python {sys.argv[0]} {pdf_filename} --import-from {bookmark_filename}
        #
        # E.g.:
        #
        # 1. Introduction
        # 3. Episode IV: A New Hope
        #    4. Scene 1: Vader captures Leia
        #    10. Scene 2: Luke on Tattoine
        # 105. Episode V: The Empire Strikes Back
        # 192. Episode VI: Return of the Jedi
    """)

    lines = [instructions, '']

    def print_tree_node(bookmark_tree: List[BookmarkTreeNode], indent: str = ''):
        next_indent = indent + '\t'
        for current in bookmark_tree:
            lines.append(f'{indent}{current.page_number}. {current.title}')
            print_tree_node(current.children, next_indent)
    print_tree_node(bookmark_tree)

    ret = '\n'.join(lines)
    logger.debug('finished generating text file')
    return ret


 def import_indented_text(file_lines: str) -> List[TextTreeNode]:

    # The value that we'll return -- it's a list of unindented file lines
    # Elements will be added to this via ancestor_child_lists
    root_node_list: List[TextTreeNode] = []

    # As we build the tree, use a stack to track the current node's ancestors
    # -- Or rather, the ancestors' lists of child nodes.
    # The first list is the list of "root nodes", who have level=1
    # The second list is the list of children of the current level=1 node,
    # for whome level=2 and so forth.
    ancestor_child_lists: List[List[TextTreeNode]] = [root_node_list]


    def get_current_level():
        return len(ancestor_child_lists)


    def push_node(node: TextTreeNode):
        # Add the node to its direct ancestor's list of children
        ancestor_child_lists[-1].append(node)

        # Push its child list onto the stack of ancestors, so further calls to
        # push_node will add children to this node (see above)
        ancestor_child_lists.append(node.children)


    def pop_node():
        if get_current_level() <= 1:
            raise RuntimeError("Bookmark level should not go below 1")
        ancestor_child_lists.pop()


    for line_number, line in enumerate(file_lines):

        # Skip blank lines and comments
        if re.match(r'^\s*#', line) or re.match(r'^\s*$', line):
            continue

        indent, content = re.match(r'^(\s*)(.+)$', line).groups()

        # Find the most-direct ancestor.
        # If the node at the top of the ancestor stack has an indent that includes
        # the current line's indent, then the current line must be either a peer
        # or an aunt/uncle of that ancestor. So pop nodes off the stack until you
        # find one with a smaller indent.
        while len(ancestor_child_lists) > 1 and ancestor_child_lists[-2][-1].indent.startswith(indent):
            pop_node()

        # The child's indent must be a superset of the ancestor's indent
        # If it's not, then someone mixed tabs and spaces and it's impossible
        # to parse the file.
        if len(ancestor_child_lists) > 1 and not indent.startswith(ancestor_child_lists[-2][-1].indent):
            raise RuntimeError(f"Inconsistent indentation whitespace at line {line_number}. Make sure you're using either tabs or spaces, not both!")

        node = TextTreeNode(
            content=content,
            indent=indent,
            line_number=line_number,
            children=[]
        )
        push_node(node)

    return root_node_list


 def build_pdf_bookmark_list(text_tree: List[TextTreeNode], level=1) -> List[PdfBookmark]:
    ret: List[PdfBookmark] = []

    for line in text_tree:
        page_number, title = re.match(r'^\s*(\d+)?(?:\.?)\s*(.+)$', line.content).groups()
        if page_number is None:
            logger.warn('No page number for bookmark named %s on line %s', title, line.line_number, level)
            page_number = 0

        ret.append(PdfBookmark(title=title, level=level, page_number=int(page_number)))

        ret += build_pdf_bookmark_list(line.children, level=(level + 1))

    return ret


 if __name__ == '__main__':
    exit(main())
	#!/usr/bin/python3
	##
	## PDF Bookmark Patcher script
	##
	## This script allows you to use a regular text editor to edit the bookmarks
	## in a PDF file. It is a wrapper around another tool called PDFtk-java.
	##
	## Usage:
	## 1. < replace somefile.pdf with the name of your file >
	## 2. python3 ../extract_bookmarks.py somefile.pdf --export-text bookmarks.txt
	## 3. < edit bookmarks.txt >
	## 4. python3 ../extract_bookmarks.py somefile.pdf --import-text bookmarks.txt
	## 5. < somefile.bookmarked.pdf is a copy of the file with updated bookmarks >
	##
	## Requires:
	## - PDFtk-java (https://gitlab.com/pdftk-java/pdftk, https://linuxhint.com/install_pdftk_ubuntu/)
	## - Make sure that `pdftk` is present in your PATH
	## - Python 3
	##

	import html
	import re
	import sys
	from argparse import ArgumentParser
	from bs4 import BeautifulSoup
	from subprocess import Popen, PIPE
	from logging import getLogger, DEBUG, basicConfig
	from textwrap import dedent
	from typing import List, NamedTuple, Union

	logger = getLogger()
	basicConfig(format='%(levelname)s - %(message)s')


	class PdfBookmark(NamedTuple):
	title: str
	level: int
	page_number: int


	class BookmarkTreeNode(NamedTuple):
	title: str
	level: int
	page_number: int
	children: List['BookmarkTreeNode']


	class TextTreeNode(NamedTuple):
	content: str
	indent: str
	line_number: int
	children: List['TextTreeNode']



	def main():
	parser = ArgumentParser(description='Use pdftk to edit bookmarks on a PDF')
	parser.add_argument('pdf_file', help='PDF file to process')
	parser.add_argument('--export-text', help='Export ')
	parser.add_argument('--import-text', help='HTML file of bookmarks')
	parser.add_argument('--output-pdf', help='PDF file to output to')
	parser.add_argument('--verbose', help='Enable verbose logging', action='store_true')

	args = parser.parse_args()

	# if args.verbose:
	# logger.setLevel(DEBUG)
	logger.setLevel(DEBUG)

	if args.export_text:
	export_text(args.pdf_file, args.export_text)
	elif args.import_text:
	import_text(args.pdf_file, args.import_text, args.output_pdf)


	def export_text(pdf_filename:str, bookmark_filename: str):
	with open(bookmark_filename, 'w') as outfile:
	data_lines: List[str] = dump_pdf_data(pdf_filename)
	bookmarks: List[PdfBookmark] = parse_bookmarks_from_pdf_data(data_lines)
	tree: List[BookmarkTreeNode] = build_bookmark_tree(bookmarks)
	html: str = export_bookmarks_text(tree, pdf_filename, bookmark_filename)
	outfile.write(html)


	def import_text(pdf_filename:str, bookmark_filename: str, output_filename: Union[str, None]):
	if not output_filename:
	output_filename = re.sub(r'\.pdf', '.bookmarked.pdf', pdf_filename)
	logger.info('Automatically generating output filename %s', output_filename)

	with open(bookmark_filename) as infile:
	file_lines: List[str] = infile.read().split('\n')

	text_tree: List[TextTreeNode] = import_indented_text(file_lines)
	bookmarks: List[PdfBookmark] = build_pdf_bookmark_list(text_tree)
	logger.info(f'Loaded {len(bookmarks)} bookmarks with {sum(1 for x in bookmarks if x.page_number != 0)} page numbers')

	original_data_lines: List[str] = dump_pdf_data(pdf_filename)
	patched_data_lines: List[str] = patch_bookmarks_into_pdf_data(original_data_lines, bookmarks)
	update_pdf_data(pdf_filename, output_filename, patched_data_lines)



	def dump_pdf_data(filename: str) -> List[str]:
	command = ['pdftk', filename, 'dump_data_utf8']
	logger.info(f'Running command: {command!r}')
	popen = Popen(command, stdout=PIPE)
	stdout_data, stderr_data = popen.communicate()
	assert popen.returncode == 0, f'{command} failed (exit code {popen.returncode})'
	ret = stdout_data.decode().split('\n')
	logger.debug(f'Command returned {len(ret)} lines')
	return ret


	def update_pdf_data(input_filename: str, output_filename: str, contents: List[str]) -> None:
	command = ['pdftk', input_filename, 'update_info_utf8', '-', 'output', output_filename, 'verbose']
	content_bytes = '\n'.join(contents).encode('utf-8')
	logger.info(f'Running command with {len(contents)} lines of input: {command!r}')
	popen = Popen(command, stdin=PIPE, stdout=PIPE)
	stdout_data, stderr_data = popen.communicate(content_bytes)
	print(stdout_data.decode())
	assert popen.returncode == 0, f'{command} failed (exit code {popen.returncode})'


	def parse_bookmarks_from_pdf_data(lines: List[str]) -> List[PdfBookmark]:
	ret: List[PdfBookmark] = []

	line_number = 0

	current_bookmark_fields: Dict = {}
	def flush():
	# Extract current state and perform reset immediately
	# so we can return early on error
	flushed_state = {**current_bookmark_fields}
	current_bookmark_fields.clear()

	# Initial record will cause a flush. This should not be an error.
	if not flushed_state.keys():
	return

	# Do not create bookmark if necessary keys are missing, but keep parsing bookmarks
	expected_keys = PdfBookmark._fields
	missing_keys = expected_keys - flushed_state.keys()
	if missing_keys:
	logger.warn('Missing keys %s when flushing bookmark on line %s', missing_keys, line_number)
	return

	# Flag when extra keys exist
	extra_keys = flushed_state.keys() - expected_keys
	if extra_keys:
	logger.warn('Extra keys %s when flushing bookmark on line %s', extra_keys, line_number)
	# Do not abort

	bookmark = PdfBookmark(**{key: flushed_state[key] for key in expected_keys})
	ret.append(bookmark)

	for line in lines:
	line_number += 1
	prop, value, *_ = line.split(': ', maxsplit=1) + [None]
	if prop == 'BookmarkBegin':
	flush()
	elif prop == 'BookmarkTitle':
	if value is None:
	logger.warn('Missing value on line %s', line_number)
	continue
	current_bookmark_fields['title'] = html.unescape(value)
	elif prop == 'BookmarkLevel':
	if value is None:
	logger.warn('Missing value on line %s', line_number)
	continue
	try:
	current_bookmark_fields['level'] = int(value)
	except ValueError:
	logger.warn('Cannot parse %s as an integer on line %s', value, line_number)
	elif prop == 'BookmarkPageNumber':
	if value is None:
	logger.warn('Missing value on line %s', line_number)
	continue
	try:
	current_bookmark_fields['page_number'] = int(value)
	except ValueError:
	logger.warn('Cannot parse %s as an integer on line %s', value, line_number)

	flush()

	return ret


	def patch_bookmarks_into_pdf_data(pdftk_data: List[str], bookmarks: List[PdfBookmark]) -> List[str]:
	ret: List[str] = []
	data_written = False

	bookmark_lines = [
	line
	for bookmark in bookmarks
	for line in [
	'BookmarkBegin',
	f'BookmarkTitle: {html.escape(bookmark.title)}',
	f'BookmarkLevel: {bookmark.level}',
	f'BookmarkPageNumber: {bookmark.page_number}',
	]
	]

	for line in pdftk_data:
	if line.startswith('BookmarkBegin'):
	if not data_written:
	data_written = True
	ret += bookmark_lines
	continue
	elif line.startswith('BookmarkTitle') or line.startswith('BookmarkLevel') or line.startswith('BookmarkPageNumber'):
	continue
	ret.append(line)

	if not data_written:
	data_written = True
	ret += bookmark_lines

	return ret


	def build_bookmark_tree(bookmarks: List[PdfBookmark]) -> List[BookmarkTreeNode]:
	logger.debug('converting list of %s bookmarks into a tree based on their "level" properties', len(bookmarks))

	# The value that we'll return -- it's a list of bookmark nodes with level=1
	# Elements will be added to this via ancestor_child_lists
	root_node_list: List[BookmarkTreeNode] = [];

	# As we build the tree, use a stack to track the current node's ancestors
	# -- Or rather, the ancestors' lists of child nodes.
	# The first list is the list of "root nodes", who have level=1
	# The second list is the list of children of the current level=1 node,
	# for whome level=2 and so forth.
	ancestor_child_lists: List[List[BookmarkTreeNode]] = [root_node_list]


	def get_current_level():
	return len(ancestor_child_lists)


	def push_node(node: BookmarkTreeNode):
	# Add the node to its direct ancestor's list of children
	ancestor_child_lists[-1].append(node)

	# Push its child list onto the stack of ancestors, so further calls to
	# push_node will add children to this node (see above)
	ancestor_child_lists.append(node.children)


	def pop_node():
	if get_current_level() <= 1:
	raise RuntimeError("Bookmark level should not go below 1")
	ancestor_child_lists.pop()


	for bookmark in bookmarks:
	# The algorithm is centered around "add a node to its parent"
	# For that to work, the parent must be exactly one level higher
	# than the child.

	# If the current node is higher than the stack indicates,
	# peel back nodes until the stack is pointing to the appropriate parent
	while get_current_level() > bookmark.level:
	pop_node()

	# If the next bookmark jumped down a level too far (e.g. level 3 to level 5),
	# add intermediate nodes to fill the gap
	while get_current_level() < bookmark.level:
	placeholder_node = BookmarkTreeNode(
	title="<placeholder>",
	level=bookmark.level,
	page_number=0,
	children=[]
	)
	push_node(placeholder_node)

	# Add the current node
	node = BookmarkTreeNode(
	title=bookmark.title,
	level=bookmark.level,
	page_number=bookmark.page_number,
	children=[]
	)
	push_node(node)

	logger.debug('finished generating bookmark tree')
	return root_node_list


	def export_bookmarks_text(bookmark_tree: List[BookmarkTreeNode], pdf_filename: str, bookmark_filename: str) -> str:
	logger.debug('generating YML file from tree')

	instructions = dedent(f"""\
	# Bookmarks extracted from {pdf_filename}
	#
	# Instructions:
	#
	# Using a text editor, update the following list of bookmarks.
	# Each bookmark should appear on its own line, and it should follow the format
	# <page number>. <title>
	# To make some bookmarks appear as children of others, use spaces or tabs to indent.
	#
	# Blank lines and lines starting with a "#" (like this one) are comments and will not be interpreted as a bookmark
	#
	# Then, import them back into the PDF file using the command:
	# python {sys.argv[0]} {pdf_filename} --import-from {bookmark_filename}
	#
	# E.g.:
	#
	# 1. Introduction
	# 3. Episode IV: A New Hope
	# 4. Scene 1: Vader captures Leia
	# 10. Scene 2: Luke on Tattoine
	# 105. Episode V: The Empire Strikes Back
	# 192. Episode VI: Return of the Jedi
	""")

	lines = [instructions, '']

	def print_tree_node(bookmark_tree: List[BookmarkTreeNode], indent: str = ''):
	next_indent = indent + '\t'
	for current in bookmark_tree:
	lines.append(f'{indent}{current.page_number}. {current.title}')
	print_tree_node(current.children, next_indent)
	print_tree_node(bookmark_tree)

	ret = '\n'.join(lines)
	logger.debug('finished generating text file')
	return ret


	def import_indented_text(file_lines: str) -> List[TextTreeNode]:

	# The value that we'll return -- it's a list of unindented file lines
	# Elements will be added to this via ancestor_child_lists
	root_node_list: List[TextTreeNode] = []

	# As we build the tree, use a stack to track the current node's ancestors
	# -- Or rather, the ancestors' lists of child nodes.
	# The first list is the list of "root nodes", who have level=1
	# The second list is the list of children of the current level=1 node,
	# for whome level=2 and so forth.
	ancestor_child_lists: List[List[TextTreeNode]] = [root_node_list]


	def get_current_level():
	return len(ancestor_child_lists)


	def push_node(node: TextTreeNode):
	# Add the node to its direct ancestor's list of children
	ancestor_child_lists[-1].append(node)

	# Push its child list onto the stack of ancestors, so further calls to
	# push_node will add children to this node (see above)
	ancestor_child_lists.append(node.children)


	def pop_node():
	if get_current_level() <= 1:
	raise RuntimeError("Bookmark level should not go below 1")
	ancestor_child_lists.pop()


	for line_number, line in enumerate(file_lines):

	# Skip blank lines and comments
	if re.match(r'^\s#', line) or re.match(r'^\s$', line):
	continue

	indent, content = re.match(r'^(\s*)(.+)$', line).groups()

	# Find the most-direct ancestor.
	# If the node at the top of the ancestor stack has an indent that includes
	# the current line's indent, then the current line must be either a peer
	# or an aunt/uncle of that ancestor. So pop nodes off the stack until you
	# find one with a smaller indent.
	while len(ancestor_child_lists) > 1 and ancestor_child_lists[-2][-1].indent.startswith(indent):
	pop_node()

	# The child's indent must be a superset of the ancestor's indent
	# If it's not, then someone mixed tabs and spaces and it's impossible
	# to parse the file.
	if len(ancestor_child_lists) > 1 and not indent.startswith(ancestor_child_lists[-2][-1].indent):
	raise RuntimeError(f"Inconsistent indentation whitespace at line {line_number}. Make sure you're using either tabs or spaces, not both!")

	node = TextTreeNode(
	content=content,
	indent=indent,
	line_number=line_number,
	children=[]
	)
	push_node(node)

	return root_node_list


	def build_pdf_bookmark_list(text_tree: List[TextTreeNode], level=1) -> List[PdfBookmark]:
	ret: List[PdfBookmark] = []

	for line in text_tree:
	page_number, title = re.match(r'^\s(\d+)?(?:\.?)\s(.+)$', line.content).groups()
	if page_number is None:
	logger.warn('No page number for bookmark named %s on line %s', title, line.line_number, level)
	page_number = 0

	ret.append(PdfBookmark(title=title, level=level, page_number=int(page_number)))

	ret += build_pdf_bookmark_list(line.children, level=(level + 1))

	return ret


	if __name__ == '__main__':
	exit(main())