Skip to content

Instantly share code, notes, and snippets.

@jrwrigh
Last active July 10, 2025 16:06
Show Gist options
  • Save jrwrigh/563e27dfc6cfdfa793a27733a75b846a to your computer and use it in GitHub Desktop.
Save jrwrigh/563e27dfc6cfdfa793a27733a75b846a to your computer and use it in GitHub Desktop.
Pdfmerger updated for python3 usage
#! /usr/bin/env python
# Original author Nicholas Kim, modified by Yan Pashkovsky
# New license - GPL v3
import sys
import time
from pathlib import Path
# from PyPDF2 import PdfReader, PdfWriter
# try:
# from PyPDF2.utils import PdfReadError
# except ImportError:
# from PyPDF2._reader import PdfReadError
from pypdf import PdfReader, PdfWriter
try:
from pypdf.utils import PdfReadError
except ImportError:
from pypdf._reader import PdfReadError
def eprint(*args, **kwargs):
"""Print to stderr
Taken from https://stackoverflow.com/a/14981125/7564988
"""
print(*args, file=sys.stderr, **kwargs)
def get_cmdline_arguments():
"""Retrieve command line arguments."""
from optparse import OptionParser
usage_string = "%prog [-v] [-o output_name] [-b bookmarks_file] file1, file2 [, ...]"
parser = OptionParser(usage_string)
parser.add_option(
"-o", "--output",
dest="output_filename",
default=time.strftime("output_%Y%m%d_%H%M%S"),
help="Specify output filename (exclude .pdf extension); default is current date/time stamp"
)
parser.add_option(
"-b", "--bookmarks",
dest="bookmark_file",
default=None,
help="Specify the bookmark names for each file. The file should be new-line delimited and the number of lies must match the number of input files. If not given, the name of each file will be used as the bookmark name."
)
parser.add_option("-v", "--verbose",
action="store_true", dest="verbose", default=True,
help="Print detailed output (undoes quiet)")
parser.add_option("-q", "--quiet",
action="store_false", dest="verbose", default=True,
help="Do not print detailed output (undoes verbose)")
options, args = parser.parse_args()
if len(args) < 2:
parser.print_help()
sys.exit(1)
return options, args
def main():
options, filenames = get_cmdline_arguments()
verboseprint = print if options.verbose else lambda *a, **k: None
output_pdf_name = options.output_filename + ".pdf"
files_to_merge = []
bookmarks = []
verboseprint(f"Output filename: {output_pdf_name}")
verboseprint(f"Input filenames:")
for f in filenames:
verboseprint(f"\t{f}")
# gather bookmark names
if options.bookmark_file:
bookmark_path = Path(options.bookmark_file)
assert bookmark_path.exists(), f"Bookmark file '{options.bookmark_file}' does not exist."
with bookmark_path.open() as file:
bookmarks = file.read().splitlines()
assert len(bookmarks) == len(filenames), f"Number of bookmarks in '{bookmark_path}' ({len(bookmarks)}) does not match the number of files ({len(filenames)})"
verboseprint(f"Bookmark Names:")
for b in bookmarks:
verboseprint(f"\t{b}")
else:
bookmarks = [filename for filename in filenames]
verboseprint(f"Bookmark Names: Same as filenames")
# get PDF files
for f in filenames:
try:
next_pdf_file = PdfReader(open(f, "rb"))
except(PdfReadError):
eprint("%s is not a valid PDF file." % f)
sys.exit(1)
except(IOError):
eprint("%s could not be found." % f)
sys.exit(1)
else:
files_to_merge.append(next_pdf_file)
# merge page by page
output_pdf_stream = PdfWriter()
j=0
k=0
for f in files_to_merge:
verboseprint(f"Adding {filenames[k]} to output")
for i in range(len(f.pages)):
output_pdf_stream.add_page(f.pages[i])
if i==0:
output_pdf_stream.add_outline_item(str(bookmarks[k]),j)
j = j + 1
k += 1
# create output pdf file
verboseprint(f"Writing output file...")
try:
output_pdf_file = open(output_pdf_name, "wb")
output_pdf_stream.write(output_pdf_file)
finally:
output_pdf_file.close()
print("%s successfully created." % output_pdf_name)
if __name__ == "__main__":
main()
@jrwrigh
Copy link
Author

jrwrigh commented Dec 11, 2024

Latest revision works with PyPDF2 version 3.0.1.

NOTE PyPDF2 is deprecated in favor of pypdf, which actually gets regular updates. This still works for now, so I'm not going to fix what isn't broken.

@jrwrigh
Copy link
Author

jrwrigh commented Dec 11, 2024

Compatibility with mainline pypdf is as simple as replacing PyPDF2 with pypdf in the import statements; everything else should work.

@jrwrigh
Copy link
Author

jrwrigh commented Jul 10, 2025

Latest update adds the ability to customize what the bookmark labels to be added are and also adds more verbose output (with the option of disabling).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment