Skip to content

Instantly share code, notes, and snippets.

@JonathanLalou
Last active April 17, 2025 19:18
Show Gist options
  • Save JonathanLalou/030dd0450aef0c931d1d2a1aae7633c1 to your computer and use it in GitHub Desktop.
Save JonathanLalou/030dd0450aef0c931d1d2a1aae7633c1 to your computer and use it in GitHub Desktop.
input: a folder with pictures ; output: an epub file, readable in Google Play Books
import os
import sys
import logging
import zipfile
import uuid
from datetime import datetime
import argparse
from PIL import Image
import xml.etree.ElementTree
from xml.dom import minidom
# @author Jonathan Lalou / https://github.com/JonathanLalou/
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
# Define the CSS content
CSS_CONTENT = '''
body {
margin: 0;
padding: 0;
display: flex;
justify-content: center;
align-items: center;
min-height: 100vh;
}
img {
max-width: 100%;
max-height: 100vh;
object-fit: contain;
}
'''
def create_container_xml():
"""Create the container.xml file."""
logger.debug("Creating container.xml")
container = xml.etree.ElementTree.Element('container', {
'version': '1.0',
'xmlns': 'urn:oasis:names:tc:opendocument:xmlns:container'
})
rootfiles = xml.etree.ElementTree.SubElement(container, 'rootfiles')
xml.etree.ElementTree.SubElement(rootfiles, 'rootfile', {
'full-path': 'OEBPS/content.opf',
'media-type': 'application/oebps-package+xml'
})
xml_content = prettify_xml(container)
logger.debug("container.xml content:\n" + xml_content)
return xml_content
def create_content_opf(metadata, spine_items, manifest_items):
"""Create the content.opf file."""
logger.debug("Creating content.opf")
logger.debug(f"Metadata: {metadata}")
logger.debug(f"Spine items: {spine_items}")
logger.debug(f"Manifest items: {manifest_items}")
package = xml.etree.ElementTree.Element('package', {
'xmlns': 'http://www.idpf.org/2007/opf',
'xmlns:dc': 'http://purl.org/dc/elements/1.1/',
'xmlns:dcterms': 'http://purl.org/dc/terms/',
'xmlns:opf': 'http://www.idpf.org/2007/opf',
'version': '3.0',
'unique-identifier': 'bookid'
})
# Metadata
metadata_elem = xml.etree.ElementTree.SubElement(package, 'metadata')
# Required metadata
book_id = str(uuid.uuid4())
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:identifier', {'id': 'bookid'}).text = book_id
logger.debug(f"Generated book ID: {book_id}")
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:title').text = metadata.get('title', 'Untitled')
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:language').text = metadata.get('language', 'en')
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:creator').text = metadata.get('author', 'Unknown')
# Add required dcterms:modified
current_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
xml.etree.ElementTree.SubElement(metadata_elem, 'meta', {
'property': 'dcterms:modified'
}).text = current_time
# Add cover metadata
xml.etree.ElementTree.SubElement(metadata_elem, 'meta', {
'name': 'cover',
'content': 'cover-image'
})
# Add additional metadata
if metadata.get('publisher'):
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:publisher').text = metadata['publisher']
if metadata.get('description'):
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:description').text = metadata['description']
if metadata.get('rights'):
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:rights').text = metadata['rights']
if metadata.get('subject'):
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:subject').text = metadata['subject']
if metadata.get('isbn'):
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:identifier', {
'opf:scheme': 'ISBN'
}).text = metadata['isbn']
# Series metadata
if metadata.get('series'):
xml.etree.ElementTree.SubElement(metadata_elem, 'meta', {
'property': 'belongs-to-collection'
}).text = metadata['series']
xml.etree.ElementTree.SubElement(metadata_elem, 'meta', {
'property': 'group-position'
}).text = metadata.get('volume', '1')
# Release date
if metadata.get('release_date'):
xml.etree.ElementTree.SubElement(metadata_elem, 'dc:date').text = metadata['release_date']
# Version and edition
if metadata.get('version'):
xml.etree.ElementTree.SubElement(metadata_elem, 'meta', {
'property': 'schema:version'
}).text = metadata['version']
if metadata.get('edition'):
xml.etree.ElementTree.SubElement(metadata_elem, 'meta', {
'property': 'schema:bookEdition'
}).text = metadata['edition']
# Manifest
manifest = xml.etree.ElementTree.SubElement(package, 'manifest')
for item in manifest_items:
xml.etree.ElementTree.SubElement(manifest, 'item', item)
# Spine
spine = xml.etree.ElementTree.SubElement(package, 'spine')
for item in spine_items:
xml.etree.ElementTree.SubElement(spine, 'itemref', {'idref': item})
xml_content = prettify_xml(package)
logger.debug("content.opf content:\n" + xml_content)
return xml_content
def create_toc_ncx(metadata, nav_points):
"""Create the toc.ncx file."""
logger.debug("Creating toc.ncx")
logger.debug(f"Navigation points: {nav_points}")
ncx = xml.etree.ElementTree.Element('ncx', {
'xmlns': 'http://www.daisy.org/z3986/2005/ncx/',
'version': '2005-1'
})
head = xml.etree.ElementTree.SubElement(ncx, 'head')
book_id = str(uuid.uuid4())
xml.etree.ElementTree.SubElement(head, 'meta', {'name': 'dtb:uid', 'content': book_id})
logger.debug(f"Generated NCX book ID: {book_id}")
xml.etree.ElementTree.SubElement(head, 'meta', {'name': 'dtb:depth', 'content': '1'})
xml.etree.ElementTree.SubElement(head, 'meta', {'name': 'dtb:totalPageCount', 'content': '0'})
xml.etree.ElementTree.SubElement(head, 'meta', {'name': 'dtb:maxPageNumber', 'content': '0'})
doc_title = xml.etree.ElementTree.SubElement(ncx, 'docTitle')
xml.etree.ElementTree.SubElement(doc_title, 'text').text = metadata.get('title', 'Untitled')
nav_map = xml.etree.ElementTree.SubElement(ncx, 'navMap')
for i, (id, label, src) in enumerate(nav_points, 1):
nav_point = xml.etree.ElementTree.SubElement(nav_map, 'navPoint', {'id': id, 'playOrder': str(i)})
nav_label = xml.etree.ElementTree.SubElement(nav_point, 'navLabel')
xml.etree.ElementTree.SubElement(nav_label, 'text').text = label
xml.etree.ElementTree.SubElement(nav_point, 'content', {'src': src})
xml_content = prettify_xml(ncx)
logger.debug("toc.ncx content:\n" + xml_content)
return xml_content
def create_nav_xhtml(metadata, nav_points):
"""Create the nav.xhtml file."""
logger.debug("Creating nav.xhtml")
html = xml.etree.ElementTree.Element('html', {
'xmlns': 'http://www.w3.org/1999/xhtml',
'xmlns:epub': 'http://www.idpf.org/2007/ops'
})
head = xml.etree.ElementTree.SubElement(html, 'head')
xml.etree.ElementTree.SubElement(head, 'title').text = 'Table of Contents'
body = xml.etree.ElementTree.SubElement(html, 'body')
nav = xml.etree.ElementTree.SubElement(body, 'nav', {'epub:type': 'toc'})
ol = xml.etree.ElementTree.SubElement(nav, 'ol')
for _, label, src in nav_points:
li = xml.etree.ElementTree.SubElement(ol, 'li')
xml.etree.ElementTree.SubElement(li, 'a', {'href': src}).text = label
xml_content = prettify_xml(html)
logger.debug("nav.xhtml content:\n" + xml_content)
return xml_content
def create_page_xhtml(page_number, image_file):
"""Create an XHTML page for an image."""
logger.debug(f"Creating page {page_number} for image {image_file}")
html = xml.etree.ElementTree.Element('html', {
'xmlns': 'http://www.w3.org/1999/xhtml',
'xmlns:epub': 'http://www.idpf.org/2007/ops'
})
head = xml.etree.ElementTree.SubElement(html, 'head')
xml.etree.ElementTree.SubElement(head, 'title').text = f'Page {page_number}'
xml.etree.ElementTree.SubElement(head, 'link', {
'rel': 'stylesheet',
'type': 'text/css',
'href': 'style.css'
})
body = xml.etree.ElementTree.SubElement(html, 'body')
xml.etree.ElementTree.SubElement(body, 'img', {
'src': f'images/{image_file}',
'alt': f'Page {page_number}'
})
xml_content = prettify_xml(html)
logger.debug(f"Page {page_number} XHTML content:\n" + xml_content)
return xml_content
def prettify_xml(elem):
"""Convert XML element to pretty string."""
rough_string = xml.etree.ElementTree.tostring(elem, 'utf-8')
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent=" ")
def create_epub_from_images(image_folder, output_file, metadata):
logger.info(f"Starting EPUB creation from images in {image_folder}")
logger.info(f"Output file will be: {output_file}")
logger.info(f"Metadata: {metadata}")
# Get all image files
image_files = [f for f in os.listdir(image_folder)
if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]
image_files.sort()
logger.info(f"Found {len(image_files)} image files")
logger.debug(f"Image files: {image_files}")
if not image_files:
logger.error("No image files found in the specified folder")
sys.exit(1)
# Create ZIP file (EPUB)
logger.info("Creating EPUB file structure")
with zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) as epub:
# Add mimetype (must be first, uncompressed)
logger.debug("Adding mimetype file (uncompressed)")
epub.writestr('mimetype', 'application/epub+zip', zipfile.ZIP_STORED)
# Create META-INF directory
logger.debug("Adding container.xml")
epub.writestr('META-INF/container.xml', create_container_xml())
# Create OEBPS directory structure
logger.debug("Creating OEBPS directory structure")
os.makedirs('temp/OEBPS/images', exist_ok=True)
os.makedirs('temp/OEBPS/style', exist_ok=True)
# Add CSS
logger.debug("Adding style.css")
epub.writestr('OEBPS/style.css', CSS_CONTENT)
# Process images and create pages
logger.info("Processing images and creating pages")
manifest_items = [
{'id': 'style', 'href': 'style.css', 'media-type': 'text/css'},
{'id': 'nav', 'href': 'nav.xhtml', 'media-type': 'application/xhtml+xml', 'properties': 'nav'}
]
spine_items = []
nav_points = []
for i, image_file in enumerate(image_files, 1):
logger.debug(f"Processing image {i:03d}/{len(image_files):03d}: {image_file}")
# Copy image to temp directory
image_path = os.path.join(image_folder, image_file)
logger.debug(f"Reading image: {image_path}")
with open(image_path, 'rb') as f:
image_data = f.read()
logger.debug(f"Adding image to EPUB: OEBPS/images/{image_file}")
epub.writestr(f'OEBPS/images/{image_file}', image_data)
# Add image to manifest
image_id = f'image_{i:03d}'
if i == 1:
image_id = 'cover-image' # Special ID for cover image
manifest_items.append({
'id': image_id,
'href': f'images/{image_file}',
'media-type': 'image/jpeg' if image_file.lower().endswith(('.jpg', '.jpeg')) else 'image/png'
})
# Create page XHTML
page_id = f'page_{i:03d}'
logger.debug(f"Creating page XHTML: {page_id}.xhtml")
page_content = create_page_xhtml(i, image_file)
epub.writestr(f'OEBPS/{page_id}.xhtml', page_content)
# Add to manifest and spine
manifest_items.append({
'id': page_id,
'href': f'{page_id}.xhtml',
'media-type': 'application/xhtml+xml'
})
spine_items.append(page_id)
# Add to navigation points
nav_points.append((
f'navpoint-{i:03d}',
'Cover' if i == 1 else f'Page {i:03d}',
f'{page_id}.xhtml'
))
# Create content.opf
logger.debug("Creating content.opf")
epub.writestr('OEBPS/content.opf', create_content_opf(metadata, spine_items, manifest_items))
# Create toc.ncx
logger.debug("Creating toc.ncx")
epub.writestr('OEBPS/toc.ncx', create_toc_ncx(metadata, nav_points))
# Create nav.xhtml
logger.debug("Creating nav.xhtml")
epub.writestr('OEBPS/nav.xhtml', create_nav_xhtml(metadata, nav_points))
logger.info(f"Successfully created EPUB file: {output_file}")
logger.info("EPUB structure:")
logger.info(" mimetype")
logger.info(" META-INF/container.xml")
logger.info(" OEBPS/")
logger.info(" content.opf")
logger.info(" toc.ncx")
logger.info(" nav.xhtml")
logger.info(" style.css")
logger.info(" images/")
for i in range(1, len(image_files) + 1):
logger.info(f" page_{i:03d}.xhtml")
def generate_default_filename(metadata, image_folder):
"""Generate default EPUB filename based on metadata."""
# Get title from metadata or use folder name
title = metadata.get('title')
if not title:
# Get folder name and extract part before last underscore
folder_name = os.path.basename(os.path.normpath(image_folder))
title = folder_name.rsplit('_', 1)[0] if '_' in folder_name else folder_name
# Format title: remove spaces, hyphens, quotes and capitalize
title = ''.join(word.capitalize() for word in title.replace('-', ' ').replace('"', '').replace("'", '').split())
# Format volume number with 2 digits
volume = metadata.get('volume', '01')
if volume.isdigit():
volume = f"{int(volume):02d}"
# Get edition number
edition = metadata.get('edition', '1')
return f"{title}_{volume}_{edition}.epub"
def main():
parser = argparse.ArgumentParser(description='Create an EPUB from a folder of images')
parser.add_argument('image_folder', help='Folder containing the images')
parser.add_argument('--output-file', '-o', help='Output EPUB file path (optional)')
parser.add_argument('--title', help='Book title')
parser.add_argument('--author', help='Book author')
parser.add_argument('--series', help='Series name')
parser.add_argument('--volume', help='Volume number')
parser.add_argument('--release-date', help='Release date (YYYY-MM-DD)')
parser.add_argument('--edition', help='Edition number')
parser.add_argument('--version', help='Version number')
parser.add_argument('--language', help='Book language (default: en)')
parser.add_argument('--publisher', help='Publisher name')
parser.add_argument('--description', help='Book description')
parser.add_argument('--rights', help='Copyright/license information')
parser.add_argument('--subject', help='Book subject/category')
parser.add_argument('--isbn', help='ISBN number')
parser.add_argument('--debug', action='store_true', help='Enable debug logging')
args = parser.parse_args()
if args.debug:
logger.setLevel(logging.DEBUG)
logger.info("Debug logging enabled")
if not os.path.exists(args.image_folder):
logger.error(f"Image folder does not exist: {args.image_folder}")
sys.exit(1)
if not os.path.isdir(args.image_folder):
logger.error(f"Specified path is not a directory: {args.image_folder}")
sys.exit(1)
metadata = {
'title': args.title,
'author': args.author,
'series': args.series,
'volume': args.volume,
'release_date': args.release_date,
'edition': args.edition,
'version': args.version,
'language': args.language,
'publisher': args.publisher,
'description': args.description,
'rights': args.rights,
'subject': args.subject,
'isbn': args.isbn
}
# Remove None values from metadata
metadata = {k: v for k, v in metadata.items() if v is not None}
# Generate output filename if not provided
if not args.output_file:
args.output_file = generate_default_filename(metadata, args.image_folder)
logger.info(f"Using default output filename: {args.output_file}")
try:
create_epub_from_images(args.image_folder, args.output_file, metadata)
logger.info("EPUB creation completed successfully")
except Exception as e:
logger.error(f"EPUB creation failed: {str(e)}")
sys.exit(1)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment