jrwrigh · July 10, 2025 16:06 · jrwrigh · Dec 11, 2024 · jrwrigh · Dec 11, 2024
diff --git a/README.md b/README.md
diff --git a/pdfmerger.py b/pdfmerger.py
 #! /usr/bin/env python
 # Original author Nicholas Kim, modified by Yan Pashkovsky
 # New license - GPL v3
 import sys
 import time
 from pathlib import Path
 # from PyPDF2 import PdfReader, PdfWriter
 # try:
 #     from PyPDF2.utils import PdfReadError
 # except ImportError:
 #     from PyPDF2._reader import PdfReadError

 from pypdf import PdfReader, PdfWriter
 try:
    from pypdf.utils import PdfReadError
 except ImportError:
    from pypdf._reader import PdfReadError

 def eprint(*args, **kwargs):
    """Print to stderr

    Taken from https://stackoverflow.com/a/14981125/7564988
    """
    print(*args, file=sys.stderr, **kwargs)

 def get_cmdline_arguments():
    """Retrieve command line arguments."""

    from optparse import OptionParser

    usage_string = "%prog [-v] [-o output_name] [-b bookmarks_file] file1, file2 [, ...]"

    parser = OptionParser(usage_string)
    parser.add_option(
        "-o", "--output",
        dest="output_filename",
        default=time.strftime("output_%Y%m%d_%H%M%S"),
        help="Specify output filename (exclude .pdf extension); default is current date/time stamp"
    )
    parser.add_option(
        "-b", "--bookmarks",
        dest="bookmark_file",
        default=None,
        help="Specify the bookmark names for each file. The file should be new-line delimited and the number of lies must match the number of input files. If not given, the name of each file will be used as the bookmark name."
    )
    parser.add_option("-v", "--verbose",
                  action="store_true", dest="verbose", default=True,
                  help="Print detailed output (undoes quiet)")
    parser.add_option("-q", "--quiet",
                  action="store_false", dest="verbose", default=True,
                  help="Do not print detailed output (undoes verbose)")

    options, args = parser.parse_args()
    if len(args) < 2:
        parser.print_help()
        sys.exit(1)
    return options, args

 def main():
    options, filenames = get_cmdline_arguments()
    verboseprint = print if options.verbose else lambda *a, **k: None
    output_pdf_name = options.output_filename + ".pdf"
    files_to_merge = []
    bookmarks = []

    verboseprint(f"Output filename: {output_pdf_name}")
    verboseprint(f"Input filenames:")
    for f in filenames:
        verboseprint(f"\t{f}")

    # gather bookmark names
    if options.bookmark_file:
        bookmark_path = Path(options.bookmark_file)
        assert bookmark_path.exists(), f"Bookmark file '{options.bookmark_file}' does not exist."
        with bookmark_path.open() as file:
            bookmarks = file.read().splitlines()

        assert len(bookmarks) == len(filenames), f"Number of bookmarks in '{bookmark_path}' ({len(bookmarks)}) does not match the number of files ({len(filenames)})"

        verboseprint(f"Bookmark Names:")
        for b in bookmarks:
            verboseprint(f"\t{b}")
    else:
        bookmarks = [filename for filename in filenames]
        verboseprint(f"Bookmark Names: Same as filenames")

    # get PDF files
    for f in filenames:
        try:
            next_pdf_file = PdfReader(open(f, "rb"))
        except(PdfReadError):
            eprint("%s is not a valid PDF file." % f)
            sys.exit(1)
        except(IOError):
            eprint("%s could not be found." % f)
            sys.exit(1)
        else:
            files_to_merge.append(next_pdf_file)

    # merge page by page
    output_pdf_stream = PdfWriter()
    j=0
    k=0
    for f in files_to_merge:
        verboseprint(f"Adding {filenames[k]} to output")
        for i in range(len(f.pages)):
            output_pdf_stream.add_page(f.pages[i])
            if i==0:
                output_pdf_stream.add_outline_item(str(bookmarks[k]),j)
            j = j + 1
        k += 1

    # create output pdf file
    verboseprint(f"Writing output file...")
    try:
        output_pdf_file = open(output_pdf_name, "wb")
        output_pdf_stream.write(output_pdf_file)
    finally:
        output_pdf_file.close()

    print("%s successfully created." % output_pdf_name)


 if __name__ == "__main__":
    main()
	#! /usr/bin/env python
	# Original author Nicholas Kim, modified by Yan Pashkovsky
	# New license - GPL v3
	import sys
	import time
	from pathlib import Path
	# from PyPDF2 import PdfReader, PdfWriter
	# try:
	# from PyPDF2.utils import PdfReadError
	# except ImportError:
	# from PyPDF2._reader import PdfReadError

	from pypdf import PdfReader, PdfWriter
	try:
	from pypdf.utils import PdfReadError
	except ImportError:
	from pypdf._reader import PdfReadError

	def eprint(args, *kwargs):
	"""Print to stderr

	Taken from https://stackoverflow.com/a/14981125/7564988
	"""
	print(args, file=sys.stderr, *kwargs)

	def get_cmdline_arguments():
	"""Retrieve command line arguments."""

	from optparse import OptionParser

	usage_string = "%prog [-v] [-o output_name] [-b bookmarks_file] file1, file2 [, ...]"

	parser = OptionParser(usage_string)
	parser.add_option(
	"-o", "--output",
	dest="output_filename",
	default=time.strftime("output_%Y%m%d_%H%M%S"),
	help="Specify output filename (exclude .pdf extension); default is current date/time stamp"
	)
	parser.add_option(
	"-b", "--bookmarks",
	dest="bookmark_file",
	default=None,
	help="Specify the bookmark names for each file. The file should be new-line delimited and the number of lies must match the number of input files. If not given, the name of each file will be used as the bookmark name."
	)
	parser.add_option("-v", "--verbose",
	action="store_true", dest="verbose", default=True,
	help="Print detailed output (undoes quiet)")
	parser.add_option("-q", "--quiet",
	action="store_false", dest="verbose", default=True,
	help="Do not print detailed output (undoes verbose)")

	options, args = parser.parse_args()
	if len(args) < 2:
	parser.print_help()
	sys.exit(1)
	return options, args

	def main():
	options, filenames = get_cmdline_arguments()
	verboseprint = print if options.verbose else lambda a, *k: None
	output_pdf_name = options.output_filename + ".pdf"
	files_to_merge = []
	bookmarks = []

	verboseprint(f"Output filename: {output_pdf_name}")
	verboseprint(f"Input filenames:")
	for f in filenames:
	verboseprint(f"\t{f}")

	# gather bookmark names
	if options.bookmark_file:
	bookmark_path = Path(options.bookmark_file)
	assert bookmark_path.exists(), f"Bookmark file '{options.bookmark_file}' does not exist."
	with bookmark_path.open() as file:
	bookmarks = file.read().splitlines()

	assert len(bookmarks) == len(filenames), f"Number of bookmarks in '{bookmark_path}' ({len(bookmarks)}) does not match the number of files ({len(filenames)})"

	verboseprint(f"Bookmark Names:")
	for b in bookmarks:
	verboseprint(f"\t{b}")
	else:
	bookmarks = [filename for filename in filenames]
	verboseprint(f"Bookmark Names: Same as filenames")

	# get PDF files
	for f in filenames:
	try:
	next_pdf_file = PdfReader(open(f, "rb"))
	except(PdfReadError):
	eprint("%s is not a valid PDF file." % f)
	sys.exit(1)
	except(IOError):
	eprint("%s could not be found." % f)
	sys.exit(1)
	else:
	files_to_merge.append(next_pdf_file)

	# merge page by page
	output_pdf_stream = PdfWriter()
	j=0
	k=0
	for f in files_to_merge:
	verboseprint(f"Adding {filenames[k]} to output")
	for i in range(len(f.pages)):
	output_pdf_stream.add_page(f.pages[i])
	if i==0:
	output_pdf_stream.add_outline_item(str(bookmarks[k]),j)
	j = j + 1
	k += 1

	# create output pdf file
	verboseprint(f"Writing output file...")
	try:
	output_pdf_file = open(output_pdf_name, "wb")
	output_pdf_stream.write(output_pdf_file)
	finally:
	output_pdf_file.close()

	print("%s successfully created." % output_pdf_name)


	if __name__ == "__main__":
	main()