|
#!/bin/bash |
|
set -e |
|
|
|
if [ $# -ne 1 ]; then |
|
echo "Usage: $0 directory_path" |
|
exit 1 |
|
fi |
|
|
|
DIR_PATH=$1 |
|
|
|
# Check if directory exists |
|
if [ ! -d "$DIR_PATH" ]; then |
|
echo "Error: Directory '$DIR_PATH' does not exist" |
|
exit 1 |
|
fi |
|
|
|
# Function to split a PDF |
|
split_pdf() { |
|
local INPUT=$1 |
|
|
|
# First check if the file exists and is readable |
|
if [ ! -r "$INPUT" ]; then |
|
echo "Error: Cannot read file '$INPUT'" |
|
return 1 |
|
fi |
|
|
|
# Get page count |
|
local TOTAL=0 |
|
if command -v qpdf &> /dev/null; then |
|
TOTAL=$(qpdf --show-npages "$INPUT") |
|
else |
|
if command -v pdfinfo &> /dev/null; then |
|
TOTAL=$(pdfinfo "$INPUT" | grep Pages | awk '{print $2}') |
|
else |
|
echo "Error: Neither qpdf nor pdfinfo is available" |
|
return 1 |
|
fi |
|
fi |
|
|
|
if [ -z "$TOTAL" ] || [ "$TOTAL" -eq 0 ]; then |
|
echo "Error: Could not determine page count for '$INPUT'" |
|
return 1 |
|
fi |
|
|
|
echo "Processing: $INPUT" |
|
echo "Total pages: $TOTAL" |
|
|
|
# Check if PDF has more than 3000 pages |
|
if [ "$TOTAL" -le 3000 ]; then |
|
echo "Skipping: $INPUT (has $TOTAL pages, not more than 3000)" |
|
return 0 |
|
fi |
|
|
|
local PAGES_PER_PART=$(( (TOTAL + 3) / 4 )) |
|
echo "Pages per part: $PAGES_PER_PART" |
|
|
|
# Create output filenames |
|
local PART1="${INPUT%.pdf}_part1.pdf" |
|
local PART2="${INPUT%.pdf}_part2.pdf" |
|
local PART3="${INPUT%.pdf}_part3.pdf" |
|
local PART4="${INPUT%.pdf}_part4.pdf" |
|
|
|
# Split the PDF |
|
gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -dFirstPage=1 -dLastPage=$PAGES_PER_PART -sOutputFile="$PART1" "$INPUT" |
|
gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -dFirstPage=$(( PAGES_PER_PART + 1 )) -dLastPage=$(( PAGES_PER_PART * 2 )) -sOutputFile="$PART2" "$INPUT" |
|
gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -dFirstPage=$(( PAGES_PER_PART * 2 + 1 )) -dLastPage=$(( PAGES_PER_PART * 3 )) -sOutputFile="$PART3" "$INPUT" |
|
gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -dFirstPage=$(( PAGES_PER_PART * 3 + 1 )) -dLastPage=$TOTAL -sOutputFile="$PART4" "$INPUT" |
|
|
|
# Check output files |
|
local SUCCESS=true |
|
for part in "$PART1" "$PART2" "$PART3" "$PART4"; do |
|
if [ -f "$part" ]; then |
|
echo "Created: $(realpath "$part")" |
|
else |
|
echo "Failed to create: $part" |
|
SUCCESS=false |
|
fi |
|
done |
|
|
|
# Remove original only if all parts were created successfully |
|
if [ "$SUCCESS" = true ]; then |
|
echo "Removing original file: $INPUT" |
|
rm "$INPUT" |
|
echo "Successfully split $INPUT into 4 parts and removed the original" |
|
else |
|
echo "Warning: Not removing original file due to split errors" |
|
fi |
|
} |
|
|
|
# Process all PDFs in the directory |
|
echo "Looking for PDFs in: $DIR_PATH" |
|
count=0 |
|
for pdf in "$DIR_PATH"/*.pdf; do |
|
if [ -f "$pdf" ]; then |
|
split_pdf "$pdf" |
|
count=$((count + 1)) |
|
fi |
|
done |
|
|
|
if [ $count -eq 0 ]; then |
|
echo "No PDF files found in $DIR_PATH" |
|
else |
|
echo "Processed $count PDF files" |
|
fi |