Skip to content

Instantly share code, notes, and snippets.

@dino-
Created May 11, 2025 15:21
Show Gist options
  • Save dino-/1d8b9013ae98bc7ff2b7204a40c0f2f8 to your computer and use it in GitHub Desktop.
Save dino-/1d8b9013ae98bc7ff2b7204a40c0f2f8 to your computer and use it in GitHub Desktop.
Script for extracting the cover art image from EPUB2/3 files
#! /usr/bin/env bash
# Note this script requires some tools are available:
#
# program package (Arch Linux)
# ----------------------------------------------------------------
# display imagemagick (optional, if you want to view the image)
# unzip unzip
# xq yq
debug=false
# debug=true
log () {
[[ "$debug" == true ]] && echo "$@"
}
bookFile="${1:?ERROR: Please supply the path to an EPUB book file}"
log "bookFile: $bookFile"
# The XML document containing the metadata for a book is described in the file META-INF/container.xml
metadataPath=$(xq -r '.container.rootfiles.rootfile.["@full-path"]' <(unzip -q -c "$bookFile" META-INF/container.xml))
log "metadataPath: $metadataPath"
# This path is also the only place I know of to get the top-level directory
# within the EPUB zip file, we need it later
topLevelDir=$(dirname "$metadataPath")
# We need to handle this specially because, if there's no subdirectory in the
# zip file, we get '.' back here but paths prefixed with './' are not valid
# for extracting with the unzip utility
if [ "$topLevelDir" == "." ]
then topLevelDir=""
else topLevelDir="$topLevelDir/"
fi
log "topLevelDir: $topLevelDir"
# We're about to query several things from the metadata XML content so let's
# put it in a variable
metadataContents="$(unzip -q -c "$bookFile" "$metadataPath")"
# The location of cover images can depend on the version of EPUB we're dealing
# with, get that info
epubVersion=$(echo "$metadataContents" | xq -r '.package."@version"')
# Get the relative path within the book file of the cover image
case "$epubVersion" in
2*)
bookCoverRelPath=$(echo "$metadataContents" | xq -r '.package.manifest.item[] | select(."@id" | contains("cover")) | ."@href"');
if [ -z "$bookCoverRelPath" ]
then bookCoverRelPath=$(echo "$metadataContents" | xq -r '.package.manifest.item[] | select(."@href" | contains("cover")) | ."@href"')
fi;;
3*)
coverImageId=$(echo "$metadataContents" | xq -r '.package.metadata.meta[] | select(."@name" == "cover") | ."@content"');
bookCoverRelPath=$(echo "$metadataContents" | xq -r ".package.manifest.item[] | select(.\"@id\" == \"$coverImageId\") | .\"@href\"");
esac
log "epubVersion: $epubVersion"
log "bookCoverRelPath: $bookCoverRelPath"
# If we have a path, construct a full path within the EPUB zip to the book cover image file
if [ -z "$bookCoverRelPath" ]
then bookCoverPath="--NO COVER IMAGE--"
else bookCoverPath="${topLevelDir}${bookCoverRelPath}"
fi
# Now do whatever you need to with it
echo "$bookFile [ $bookCoverPath ]"
# Send the image to magick display to view it...
# display <(unzip -q -c "$bookFile" "$bookCoverPath")
# ...or extract the image file with full path into .
# unzip -q "$bookFile" "$bookCoverPath"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment