Skip to content

Instantly share code, notes, and snippets.

@onurkose
Last active May 21, 2025 10:21
Show Gist options
  • Save onurkose/69daeab61a352d17d4039f6710d65e64 to your computer and use it in GitHub Desktop.
Save onurkose/69daeab61a352d17d4039f6710d65e64 to your computer and use it in GitHub Desktop.
#!/bin/bash
# Mixpanel batch import script
# It allows you to import data from one Mixpanel project to another
# Usage: ./mixpanel_batch_import.sh input_file.json project_id api_secret
# Api secret is base64 encoded string generated using your Mixpanel Username&Secret
if [ "$#" -lt 3 ]; then
echo "Usage: $0 <input_file.{json/txt}> <project_id> <api_secret>"
exit 1
fi
INPUT_FILE=$1
PROJECT_ID=$2
API_SECRET=$3
# Check if the input file exists
if [ ! -f "$INPUT_FILE" ]; then
echo "Error: Input file $INPUT_FILE not found"
exit 1
fi
# Count total number of events in input file
# Assuming one event per line in the JSON file
TOTAL_EVENTS=$(wc -l < "$INPUT_FILE")
echo "Total events in file: $TOTAL_EVENTS"
# Batch size - Mixpanel allows 2000 events per request
BATCH_SIZE=2000
TOTAL_BATCHES=$((($TOTAL_EVENTS + $BATCH_SIZE - 1) / $BATCH_SIZE))
echo "Will process in $TOTAL_BATCHES batches of $BATCH_SIZE events each"
# Create a temporary directory for batch files
TEMP_DIR=$(mktemp -d)
echo "Using temporary directory: $TEMP_DIR"
# Split the input file into batches
echo "Splitting input file into batches..."
split -l $BATCH_SIZE "$INPUT_FILE" "$TEMP_DIR/batch_"
# Process each batch
BATCH_COUNT=0
SUCCESSFUL_EVENTS=0
FAILED_EVENTS=0
for BATCH_FILE in "$TEMP_DIR"/batch_*; do
BATCH_COUNT=$((BATCH_COUNT + 1))
# Count events in this batch
BATCH_EVENTS=$(wc -l < "$BATCH_FILE")
echo "Processing batch $BATCH_COUNT/$TOTAL_BATCHES ($BATCH_EVENTS events)..."
# Make sure we have proper JSON array format for the batch
# Wrap the lines in square brackets and add commas between events
TMP_JSON="$TEMP_DIR/request_$BATCH_COUNT.json"
echo "[" > "$TMP_JSON"
sed -e '$! s/$/,/' "$BATCH_FILE" >> "$TMP_JSON"
echo "]" >> "$TMP_JSON"
# Submit the batch to Mixpanel
RESPONSE=$(curl --silent --request POST \
--url "https://api.mixpanel.com/import?strict=1&project_id=$PROJECT_ID" \
--header 'Content-Type: application/json' \
--header 'accept: application/json' \
--header "authorization: Basic $API_SECRET" \
--data @"$TMP_JSON")
# Parse the response using jq if available, otherwise use grep
if command -v jq &> /dev/null; then
STATUS=$(echo "$RESPONSE" | jq -r '.status // ""')
IMPORTED=$(echo "$RESPONSE" | jq -r '.num_records_imported // 0')
else
STATUS=$(echo "$RESPONSE" | grep -o '"status":"[^"]*"' | cut -d'"' -f4)
IMPORTED=$(echo "$RESPONSE" | grep -o '"num_records_imported":[0-9]*' | cut -d':' -f2)
# Default to 0 if not found
IMPORTED=${IMPORTED:-0}
fi
# Check response and update counters
if [ "$STATUS" = "OK" ] || [ "$IMPORTED" -gt 0 ]; then
echo "SUCCESS: $IMPORTED events imported"
SUCCESSFUL_EVENTS=$((SUCCESSFUL_EVENTS + IMPORTED))
else
echo "FAILED: $RESPONSE"
FAILED_EVENTS=$((FAILED_EVENTS + BATCH_EVENTS))
fi
# Add a small delay to avoid rate limiting
sleep 0.5
# Progress report
PERCENT_COMPLETE=$(( (BATCH_COUNT * 100) / TOTAL_BATCHES ))
echo "Progress: $PERCENT_COMPLETE% complete ($SUCCESSFUL_EVENTS successful, $FAILED_EVENTS failed)"
done
# Clean up
echo "Cleaning up temporary files..."
rm -rf "$TEMP_DIR"
echo "Import completed!"
echo "Total events processed: $TOTAL_EVENTS"
echo "Successful events: $SUCCESSFUL_EVENTS"
echo "Failed events: $FAILED_EVENTS"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment