Created
September 11, 2022 16:58
-
-
Save derlin/6ac4330c31ff849ea6fb779ceecbac82 to your computer and use it in GitHub Desktop.
Generate PDFs out of image directories (for mangas mostly)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Generate PDFs out of directories with images. | |
If the directories have a sub-structure, the --levels argument can be used. For example, given the following structure: | |
├─ MyManga_volumes | |
│ └── Vol1 | |
│ ├── 001.png | |
│ ├── 002.jpeg | |
│ ├── ... | |
│ └── Vol2 | |
│ ├── ... | |
├─ MyManga_chaps | |
│ └── Vol1 | |
│ └── chap1 | |
│ ├── 001.png | |
│ ├── 002.jpeg | |
│ └── chap2 | |
│ ├── 001.jpg | |
│ └── Vol2 | |
│ ├── ... | |
All the following commands will PDFs per volume: | |
python scripts.py -i MyManga_volumes -l 1 | |
python scripts.py -i . -l 2 | |
python scripts.py -i MyManga_volumes/Vol1 -l 0 | |
python scripts.py -i MyManga_chaps/Vol1 -l 0 | |
Supported image extensions are .jpe?g and .png. | |
""" | |
from PIL import Image | |
from glob import glob | |
from argparse import ArgumentParser | |
import logging | |
import os | |
MAX_PAGES = 600 | |
SUPPORTED_EXTENSIONS = {'png', 'jpg', 'jpeg'} | |
logger = logging.getLogger('convert') | |
def listdir(dir): | |
return [os.path.join(dir, f) for f in os.listdir(dir)] | |
def load_image(path): | |
image = Image.open(path) | |
if image.mode != 'RGB': | |
logger.debug(f"Found image with npn-RGB channel: '{path}'. Converting to RGB.") | |
return image.convert('RGB') | |
return image | |
def convert_directory_to_pdf(input_dir, pdf_file, min_pages=40, dry_run=False): | |
images = [ | |
load_image(img) | |
for img in sorted(glob(f"{input_dir}/**/*.*", recursive=True)) | |
if img.split(".")[-1] in SUPPORTED_EXTENSIONS | |
] | |
if len(images) < min_pages: | |
logger.debug(f" Not enough images found in directory '{input_dir}' ({len(images)}). Skipping.") | |
return | |
if len(images) > MAX_PAGES: | |
if input(f"Large PDF detected: {len(images)} pages. Do You Want To Continue? [y/n]") != "y": | |
logger.debug("User didn't confirm. Skipping.") | |
return | |
if os.path.exists(pdf_file): | |
logger.info(f"{pdf_file} already exists and overwrite if False. Skipping.") | |
return | |
if not dry_run: | |
images[0].save( | |
pdf_file, "PDF" ,resolution=100.0, save_all=True, append_images=images[1:] | |
) | |
logger.info(f"Generated pdf '{pdf_file}': {len(images)} pages.") | |
def find_dirs_at_level(start, level): | |
if level == 0: | |
return [start] | |
dirs = [d for d in listdir(start) if os.path.isdir(d)] | |
if level == 1: | |
return dirs | |
# visit subdirectories | |
return [subdir for dir in dirs for subdir in find_dirs_at_level(dir, level - 1)] | |
if __name__ == "__main__": | |
parser = ArgumentParser(description = "Generate PDFs out of directories of images.") | |
parser.add_argument("-i", "--input-dir", default=".", help="Base directory to start looking for mangas.") | |
parser.add_argument("-o", "--output-dir", default='out', help="Where to save the generated pdfs.") | |
parser.add_argument("-l", "--levels", type=int, default=0, help="Number of subdirectories (e.g. volumes subdirectory).") | |
parser.add_argument("-n", "--name", choices=["directory", "path"], default="directory", | |
help="Use the full path for pdf names (separated with '_', or the last directory for PDf names.") | |
parser.add_argument("--min-pages", type=int, default=40, help="minimum images to consider the directory a manga.") | |
parser.add_argument("--dry-run", action="store_true", help="Only print what will happen, don't actually generate the PDFs.") | |
parser.add_argument("--debug", action="store_true", help="Turn on debug mode.") | |
args = parser.parse_args() | |
import sys | |
logging.basicConfig(format="%(levelname)-8s %(message)s", level=logging.INFO) | |
if args.debug: | |
logging.getLogger('convert').setLevel(logging.DEBUG) | |
os.makedirs(args.output_dir, exist_ok=True) | |
for directory in sorted(find_dirs_at_level(args.input_dir, args.levels)): | |
logger.debug(f"Visiting directory {directory}") | |
segments = os.path.split(directory) | |
name = (segments[-1] if args.name == "directory" else "_".join(segments)) + ".pdf" | |
convert_directory_to_pdf(directory, os.path.join(args.output_dir, name), args.min_pages, args.dry_run) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Pillow>=9.2.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment