Created
May 11, 2025 15:21
-
-
Save dino-/1d8b9013ae98bc7ff2b7204a40c0f2f8 to your computer and use it in GitHub Desktop.
Script for extracting the cover art image from EPUB2/3 files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env bash | |
# Note this script requires some tools are available: | |
# | |
# program package (Arch Linux) | |
# ---------------------------------------------------------------- | |
# display imagemagick (optional, if you want to view the image) | |
# unzip unzip | |
# xq yq | |
debug=false | |
# debug=true | |
log () { | |
[[ "$debug" == true ]] && echo "$@" | |
} | |
bookFile="${1:?ERROR: Please supply the path to an EPUB book file}" | |
log "bookFile: $bookFile" | |
# The XML document containing the metadata for a book is described in the file META-INF/container.xml | |
metadataPath=$(xq -r '.container.rootfiles.rootfile.["@full-path"]' <(unzip -q -c "$bookFile" META-INF/container.xml)) | |
log "metadataPath: $metadataPath" | |
# This path is also the only place I know of to get the top-level directory | |
# within the EPUB zip file, we need it later | |
topLevelDir=$(dirname "$metadataPath") | |
# We need to handle this specially because, if there's no subdirectory in the | |
# zip file, we get '.' back here but paths prefixed with './' are not valid | |
# for extracting with the unzip utility | |
if [ "$topLevelDir" == "." ] | |
then topLevelDir="" | |
else topLevelDir="$topLevelDir/" | |
fi | |
log "topLevelDir: $topLevelDir" | |
# We're about to query several things from the metadata XML content so let's | |
# put it in a variable | |
metadataContents="$(unzip -q -c "$bookFile" "$metadataPath")" | |
# The location of cover images can depend on the version of EPUB we're dealing | |
# with, get that info | |
epubVersion=$(echo "$metadataContents" | xq -r '.package."@version"') | |
# Get the relative path within the book file of the cover image | |
case "$epubVersion" in | |
2*) | |
bookCoverRelPath=$(echo "$metadataContents" | xq -r '.package.manifest.item[] | select(."@id" | contains("cover")) | ."@href"'); | |
if [ -z "$bookCoverRelPath" ] | |
then bookCoverRelPath=$(echo "$metadataContents" | xq -r '.package.manifest.item[] | select(."@href" | contains("cover")) | ."@href"') | |
fi;; | |
3*) | |
coverImageId=$(echo "$metadataContents" | xq -r '.package.metadata.meta[] | select(."@name" == "cover") | ."@content"'); | |
bookCoverRelPath=$(echo "$metadataContents" | xq -r ".package.manifest.item[] | select(.\"@id\" == \"$coverImageId\") | .\"@href\""); | |
esac | |
log "epubVersion: $epubVersion" | |
log "bookCoverRelPath: $bookCoverRelPath" | |
# If we have a path, construct a full path within the EPUB zip to the book cover image file | |
if [ -z "$bookCoverRelPath" ] | |
then bookCoverPath="--NO COVER IMAGE--" | |
else bookCoverPath="${topLevelDir}${bookCoverRelPath}" | |
fi | |
# Now do whatever you need to with it | |
echo "$bookFile [ $bookCoverPath ]" | |
# Send the image to magick display to view it... | |
# display <(unzip -q -c "$bookFile" "$bookCoverPath") | |
# ...or extract the image file with full path into . | |
# unzip -q "$bookFile" "$bookCoverPath" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment