Last active
April 17, 2025 19:18
-
-
Save JonathanLalou/030dd0450aef0c931d1d2a1aae7633c1 to your computer and use it in GitHub Desktop.
input: a folder with pictures ; output: an epub file, readable in Google Play Books
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import logging | |
import zipfile | |
import uuid | |
from datetime import datetime | |
import argparse | |
from PIL import Image | |
import xml.etree.ElementTree | |
from xml.dom import minidom | |
# @author Jonathan Lalou / https://github.com/JonathanLalou/ | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s', | |
handlers=[ | |
logging.StreamHandler(sys.stdout) | |
] | |
) | |
logger = logging.getLogger(__name__) | |
# Define the CSS content | |
CSS_CONTENT = ''' | |
body { | |
margin: 0; | |
padding: 0; | |
display: flex; | |
justify-content: center; | |
align-items: center; | |
min-height: 100vh; | |
} | |
img { | |
max-width: 100%; | |
max-height: 100vh; | |
object-fit: contain; | |
} | |
''' | |
def create_container_xml(): | |
"""Create the container.xml file.""" | |
logger.debug("Creating container.xml") | |
container = xml.etree.ElementTree.Element('container', { | |
'version': '1.0', | |
'xmlns': 'urn:oasis:names:tc:opendocument:xmlns:container' | |
}) | |
rootfiles = xml.etree.ElementTree.SubElement(container, 'rootfiles') | |
xml.etree.ElementTree.SubElement(rootfiles, 'rootfile', { | |
'full-path': 'OEBPS/content.opf', | |
'media-type': 'application/oebps-package+xml' | |
}) | |
xml_content = prettify_xml(container) | |
logger.debug("container.xml content:\n" + xml_content) | |
return xml_content | |
def create_content_opf(metadata, spine_items, manifest_items): | |
"""Create the content.opf file.""" | |
logger.debug("Creating content.opf") | |
logger.debug(f"Metadata: {metadata}") | |
logger.debug(f"Spine items: {spine_items}") | |
logger.debug(f"Manifest items: {manifest_items}") | |
package = xml.etree.ElementTree.Element('package', { | |
'xmlns': 'http://www.idpf.org/2007/opf', | |
'xmlns:dc': 'http://purl.org/dc/elements/1.1/', | |
'xmlns:dcterms': 'http://purl.org/dc/terms/', | |
'xmlns:opf': 'http://www.idpf.org/2007/opf', | |
'version': '3.0', | |
'unique-identifier': 'bookid' | |
}) | |
# Metadata | |
metadata_elem = xml.etree.ElementTree.SubElement(package, 'metadata') | |
# Required metadata | |
book_id = str(uuid.uuid4()) | |
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:identifier', {'id': 'bookid'}).text = book_id | |
logger.debug(f"Generated book ID: {book_id}") | |
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:title').text = metadata.get('title', 'Untitled') | |
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:language').text = metadata.get('language', 'en') | |
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:creator').text = metadata.get('author', 'Unknown') | |
# Add required dcterms:modified | |
current_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') | |
xml.etree.ElementTree.SubElement(metadata_elem, 'meta', { | |
'property': 'dcterms:modified' | |
}).text = current_time | |
# Add cover metadata | |
xml.etree.ElementTree.SubElement(metadata_elem, 'meta', { | |
'name': 'cover', | |
'content': 'cover-image' | |
}) | |
# Add additional metadata | |
if metadata.get('publisher'): | |
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:publisher').text = metadata['publisher'] | |
if metadata.get('description'): | |
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:description').text = metadata['description'] | |
if metadata.get('rights'): | |
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:rights').text = metadata['rights'] | |
if metadata.get('subject'): | |
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:subject').text = metadata['subject'] | |
if metadata.get('isbn'): | |
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:identifier', { | |
'opf:scheme': 'ISBN' | |
}).text = metadata['isbn'] | |
# Series metadata | |
if metadata.get('series'): | |
xml.etree.ElementTree.SubElement(metadata_elem, 'meta', { | |
'property': 'belongs-to-collection' | |
}).text = metadata['series'] | |
xml.etree.ElementTree.SubElement(metadata_elem, 'meta', { | |
'property': 'group-position' | |
}).text = metadata.get('volume', '1') | |
# Release date | |
if metadata.get('release_date'): | |
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:date').text = metadata['release_date'] | |
# Version and edition | |
if metadata.get('version'): | |
xml.etree.ElementTree.SubElement(metadata_elem, 'meta', { | |
'property': 'schema:version' | |
}).text = metadata['version'] | |
if metadata.get('edition'): | |
xml.etree.ElementTree.SubElement(metadata_elem, 'meta', { | |
'property': 'schema:bookEdition' | |
}).text = metadata['edition'] | |
# Manifest | |
manifest = xml.etree.ElementTree.SubElement(package, 'manifest') | |
for item in manifest_items: | |
xml.etree.ElementTree.SubElement(manifest, 'item', item) | |
# Spine | |
spine = xml.etree.ElementTree.SubElement(package, 'spine') | |
for item in spine_items: | |
xml.etree.ElementTree.SubElement(spine, 'itemref', {'idref': item}) | |
xml_content = prettify_xml(package) | |
logger.debug("content.opf content:\n" + xml_content) | |
return xml_content | |
def create_toc_ncx(metadata, nav_points): | |
"""Create the toc.ncx file.""" | |
logger.debug("Creating toc.ncx") | |
logger.debug(f"Navigation points: {nav_points}") | |
ncx = xml.etree.ElementTree.Element('ncx', { | |
'xmlns': 'http://www.daisy.org/z3986/2005/ncx/', | |
'version': '2005-1' | |
}) | |
head = xml.etree.ElementTree.SubElement(ncx, 'head') | |
book_id = str(uuid.uuid4()) | |
xml.etree.ElementTree.SubElement(head, 'meta', {'name': 'dtb:uid', 'content': book_id}) | |
logger.debug(f"Generated NCX book ID: {book_id}") | |
xml.etree.ElementTree.SubElement(head, 'meta', {'name': 'dtb:depth', 'content': '1'}) | |
xml.etree.ElementTree.SubElement(head, 'meta', {'name': 'dtb:totalPageCount', 'content': '0'}) | |
xml.etree.ElementTree.SubElement(head, 'meta', {'name': 'dtb:maxPageNumber', 'content': '0'}) | |
doc_title = xml.etree.ElementTree.SubElement(ncx, 'docTitle') | |
xml.etree.ElementTree.SubElement(doc_title, 'text').text = metadata.get('title', 'Untitled') | |
nav_map = xml.etree.ElementTree.SubElement(ncx, 'navMap') | |
for i, (id, label, src) in enumerate(nav_points, 1): | |
nav_point = xml.etree.ElementTree.SubElement(nav_map, 'navPoint', {'id': id, 'playOrder': str(i)}) | |
nav_label = xml.etree.ElementTree.SubElement(nav_point, 'navLabel') | |
xml.etree.ElementTree.SubElement(nav_label, 'text').text = label | |
xml.etree.ElementTree.SubElement(nav_point, 'content', {'src': src}) | |
xml_content = prettify_xml(ncx) | |
logger.debug("toc.ncx content:\n" + xml_content) | |
return xml_content | |
def create_nav_xhtml(metadata, nav_points): | |
"""Create the nav.xhtml file.""" | |
logger.debug("Creating nav.xhtml") | |
html = xml.etree.ElementTree.Element('html', { | |
'xmlns': 'http://www.w3.org/1999/xhtml', | |
'xmlns:epub': 'http://www.idpf.org/2007/ops' | |
}) | |
head = xml.etree.ElementTree.SubElement(html, 'head') | |
xml.etree.ElementTree.SubElement(head, 'title').text = 'Table of Contents' | |
body = xml.etree.ElementTree.SubElement(html, 'body') | |
nav = xml.etree.ElementTree.SubElement(body, 'nav', {'epub:type': 'toc'}) | |
ol = xml.etree.ElementTree.SubElement(nav, 'ol') | |
for _, label, src in nav_points: | |
li = xml.etree.ElementTree.SubElement(ol, 'li') | |
xml.etree.ElementTree.SubElement(li, 'a', {'href': src}).text = label | |
xml_content = prettify_xml(html) | |
logger.debug("nav.xhtml content:\n" + xml_content) | |
return xml_content | |
def create_page_xhtml(page_number, image_file): | |
"""Create an XHTML page for an image.""" | |
logger.debug(f"Creating page {page_number} for image {image_file}") | |
html = xml.etree.ElementTree.Element('html', { | |
'xmlns': 'http://www.w3.org/1999/xhtml', | |
'xmlns:epub': 'http://www.idpf.org/2007/ops' | |
}) | |
head = xml.etree.ElementTree.SubElement(html, 'head') | |
xml.etree.ElementTree.SubElement(head, 'title').text = f'Page {page_number}' | |
xml.etree.ElementTree.SubElement(head, 'link', { | |
'rel': 'stylesheet', | |
'type': 'text/css', | |
'href': 'style.css' | |
}) | |
body = xml.etree.ElementTree.SubElement(html, 'body') | |
xml.etree.ElementTree.SubElement(body, 'img', { | |
'src': f'images/{image_file}', | |
'alt': f'Page {page_number}' | |
}) | |
xml_content = prettify_xml(html) | |
logger.debug(f"Page {page_number} XHTML content:\n" + xml_content) | |
return xml_content | |
def prettify_xml(elem): | |
"""Convert XML element to pretty string.""" | |
rough_string = xml.etree.ElementTree.tostring(elem, 'utf-8') | |
reparsed = minidom.parseString(rough_string) | |
return reparsed.toprettyxml(indent=" ") | |
def create_epub_from_images(image_folder, output_file, metadata): | |
logger.info(f"Starting EPUB creation from images in {image_folder}") | |
logger.info(f"Output file will be: {output_file}") | |
logger.info(f"Metadata: {metadata}") | |
# Get all image files | |
image_files = [f for f in os.listdir(image_folder) | |
if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))] | |
image_files.sort() | |
logger.info(f"Found {len(image_files)} image files") | |
logger.debug(f"Image files: {image_files}") | |
if not image_files: | |
logger.error("No image files found in the specified folder") | |
sys.exit(1) | |
# Create ZIP file (EPUB) | |
logger.info("Creating EPUB file structure") | |
with zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) as epub: | |
# Add mimetype (must be first, uncompressed) | |
logger.debug("Adding mimetype file (uncompressed)") | |
epub.writestr('mimetype', 'application/epub+zip', zipfile.ZIP_STORED) | |
# Create META-INF directory | |
logger.debug("Adding container.xml") | |
epub.writestr('META-INF/container.xml', create_container_xml()) | |
# Create OEBPS directory structure | |
logger.debug("Creating OEBPS directory structure") | |
os.makedirs('temp/OEBPS/images', exist_ok=True) | |
os.makedirs('temp/OEBPS/style', exist_ok=True) | |
# Add CSS | |
logger.debug("Adding style.css") | |
epub.writestr('OEBPS/style.css', CSS_CONTENT) | |
# Process images and create pages | |
logger.info("Processing images and creating pages") | |
manifest_items = [ | |
{'id': 'style', 'href': 'style.css', 'media-type': 'text/css'}, | |
{'id': 'nav', 'href': 'nav.xhtml', 'media-type': 'application/xhtml+xml', 'properties': 'nav'} | |
] | |
spine_items = [] | |
nav_points = [] | |
for i, image_file in enumerate(image_files, 1): | |
logger.debug(f"Processing image {i:03d}/{len(image_files):03d}: {image_file}") | |
# Copy image to temp directory | |
image_path = os.path.join(image_folder, image_file) | |
logger.debug(f"Reading image: {image_path}") | |
with open(image_path, 'rb') as f: | |
image_data = f.read() | |
logger.debug(f"Adding image to EPUB: OEBPS/images/{image_file}") | |
epub.writestr(f'OEBPS/images/{image_file}', image_data) | |
# Add image to manifest | |
image_id = f'image_{i:03d}' | |
if i == 1: | |
image_id = 'cover-image' # Special ID for cover image | |
manifest_items.append({ | |
'id': image_id, | |
'href': f'images/{image_file}', | |
'media-type': 'image/jpeg' if image_file.lower().endswith(('.jpg', '.jpeg')) else 'image/png' | |
}) | |
# Create page XHTML | |
page_id = f'page_{i:03d}' | |
logger.debug(f"Creating page XHTML: {page_id}.xhtml") | |
page_content = create_page_xhtml(i, image_file) | |
epub.writestr(f'OEBPS/{page_id}.xhtml', page_content) | |
# Add to manifest and spine | |
manifest_items.append({ | |
'id': page_id, | |
'href': f'{page_id}.xhtml', | |
'media-type': 'application/xhtml+xml' | |
}) | |
spine_items.append(page_id) | |
# Add to navigation points | |
nav_points.append(( | |
f'navpoint-{i:03d}', | |
'Cover' if i == 1 else f'Page {i:03d}', | |
f'{page_id}.xhtml' | |
)) | |
# Create content.opf | |
logger.debug("Creating content.opf") | |
epub.writestr('OEBPS/content.opf', create_content_opf(metadata, spine_items, manifest_items)) | |
# Create toc.ncx | |
logger.debug("Creating toc.ncx") | |
epub.writestr('OEBPS/toc.ncx', create_toc_ncx(metadata, nav_points)) | |
# Create nav.xhtml | |
logger.debug("Creating nav.xhtml") | |
epub.writestr('OEBPS/nav.xhtml', create_nav_xhtml(metadata, nav_points)) | |
logger.info(f"Successfully created EPUB file: {output_file}") | |
logger.info("EPUB structure:") | |
logger.info(" mimetype") | |
logger.info(" META-INF/container.xml") | |
logger.info(" OEBPS/") | |
logger.info(" content.opf") | |
logger.info(" toc.ncx") | |
logger.info(" nav.xhtml") | |
logger.info(" style.css") | |
logger.info(" images/") | |
for i in range(1, len(image_files) + 1): | |
logger.info(f" page_{i:03d}.xhtml") | |
def generate_default_filename(metadata, image_folder): | |
"""Generate default EPUB filename based on metadata.""" | |
# Get title from metadata or use folder name | |
title = metadata.get('title') | |
if not title: | |
# Get folder name and extract part before last underscore | |
folder_name = os.path.basename(os.path.normpath(image_folder)) | |
title = folder_name.rsplit('_', 1)[0] if '_' in folder_name else folder_name | |
# Format title: remove spaces, hyphens, quotes and capitalize | |
title = ''.join(word.capitalize() for word in title.replace('-', ' ').replace('"', '').replace("'", '').split()) | |
# Format volume number with 2 digits | |
volume = metadata.get('volume', '01') | |
if volume.isdigit(): | |
volume = f"{int(volume):02d}" | |
# Get edition number | |
edition = metadata.get('edition', '1') | |
return f"{title}_{volume}_{edition}.epub" | |
def main(): | |
parser = argparse.ArgumentParser(description='Create an EPUB from a folder of images') | |
parser.add_argument('image_folder', help='Folder containing the images') | |
parser.add_argument('--output-file', '-o', help='Output EPUB file path (optional)') | |
parser.add_argument('--title', help='Book title') | |
parser.add_argument('--author', help='Book author') | |
parser.add_argument('--series', help='Series name') | |
parser.add_argument('--volume', help='Volume number') | |
parser.add_argument('--release-date', help='Release date (YYYY-MM-DD)') | |
parser.add_argument('--edition', help='Edition number') | |
parser.add_argument('--version', help='Version number') | |
parser.add_argument('--language', help='Book language (default: en)') | |
parser.add_argument('--publisher', help='Publisher name') | |
parser.add_argument('--description', help='Book description') | |
parser.add_argument('--rights', help='Copyright/license information') | |
parser.add_argument('--subject', help='Book subject/category') | |
parser.add_argument('--isbn', help='ISBN number') | |
parser.add_argument('--debug', action='store_true', help='Enable debug logging') | |
args = parser.parse_args() | |
if args.debug: | |
logger.setLevel(logging.DEBUG) | |
logger.info("Debug logging enabled") | |
if not os.path.exists(args.image_folder): | |
logger.error(f"Image folder does not exist: {args.image_folder}") | |
sys.exit(1) | |
if not os.path.isdir(args.image_folder): | |
logger.error(f"Specified path is not a directory: {args.image_folder}") | |
sys.exit(1) | |
metadata = { | |
'title': args.title, | |
'author': args.author, | |
'series': args.series, | |
'volume': args.volume, | |
'release_date': args.release_date, | |
'edition': args.edition, | |
'version': args.version, | |
'language': args.language, | |
'publisher': args.publisher, | |
'description': args.description, | |
'rights': args.rights, | |
'subject': args.subject, | |
'isbn': args.isbn | |
} | |
# Remove None values from metadata | |
metadata = {k: v for k, v in metadata.items() if v is not None} | |
# Generate output filename if not provided | |
if not args.output_file: | |
args.output_file = generate_default_filename(metadata, args.image_folder) | |
logger.info(f"Using default output filename: {args.output_file}") | |
try: | |
create_epub_from_images(args.image_folder, args.output_file, metadata) | |
logger.info("EPUB creation completed successfully") | |
except Exception as e: | |
logger.error(f"EPUB creation failed: {str(e)}") | |
sys.exit(1) | |
if __name__ == '__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment