Skip to content

Instantly share code, notes, and snippets.

@BugenZhao
Created January 26, 2023 06:53
Show Gist options
  • Save BugenZhao/1d5d0ee1fac86e25d20190dda3ccfb00 to your computer and use it in GitHub Desktop.
Save BugenZhao/1d5d0ee1fac86e25d20190dda3ccfb00 to your computer and use it in GitHub Desktop.
Extract PDF from Dark Horse Digital
from PIL import Image # pip3 install pillow
import tarfile
import json
# Suppose the book is at `https://digital.darkhorse.com/read/{hex}`,
# then visit `https://digital.darkhorse.com/api/v6/book/{hex}` to get the tarball.
book = tarfile.open('book.tar', mode='r')
manifest = json.load(book.extractfile('manifest.json'))
pages = []
for page in manifest['pages']:
pages.append(page['src_image'])
print(f'Resolved {len(pages)} pages')
jpg_images = []
for page in pages:
jpg_images.append(Image.open(book.extractfile(page)))
print(f'Extracted {len(jpg_images)} pages')
print(f'Converting to PDF...')
if jpg_images:
jpg_images[0].save('book.pdf', save_all=True, append_images=jpg_images[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment