Last active
May 21, 2025 10:21
-
-
Save onurkose/69daeab61a352d17d4039f6710d65e64 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Mixpanel batch import script | |
# It allows you to import data from one Mixpanel project to another | |
# Usage: ./mixpanel_batch_import.sh input_file.json project_id api_secret | |
# Api secret is base64 encoded string generated using your Mixpanel Username&Secret | |
if [ "$#" -lt 3 ]; then | |
echo "Usage: $0 <input_file.{json/txt}> <project_id> <api_secret>" | |
exit 1 | |
fi | |
INPUT_FILE=$1 | |
PROJECT_ID=$2 | |
API_SECRET=$3 | |
# Check if the input file exists | |
if [ ! -f "$INPUT_FILE" ]; then | |
echo "Error: Input file $INPUT_FILE not found" | |
exit 1 | |
fi | |
# Count total number of events in input file | |
# Assuming one event per line in the JSON file | |
TOTAL_EVENTS=$(wc -l < "$INPUT_FILE") | |
echo "Total events in file: $TOTAL_EVENTS" | |
# Batch size - Mixpanel allows 2000 events per request | |
BATCH_SIZE=2000 | |
TOTAL_BATCHES=$((($TOTAL_EVENTS + $BATCH_SIZE - 1) / $BATCH_SIZE)) | |
echo "Will process in $TOTAL_BATCHES batches of $BATCH_SIZE events each" | |
# Create a temporary directory for batch files | |
TEMP_DIR=$(mktemp -d) | |
echo "Using temporary directory: $TEMP_DIR" | |
# Split the input file into batches | |
echo "Splitting input file into batches..." | |
split -l $BATCH_SIZE "$INPUT_FILE" "$TEMP_DIR/batch_" | |
# Process each batch | |
BATCH_COUNT=0 | |
SUCCESSFUL_EVENTS=0 | |
FAILED_EVENTS=0 | |
for BATCH_FILE in "$TEMP_DIR"/batch_*; do | |
BATCH_COUNT=$((BATCH_COUNT + 1)) | |
# Count events in this batch | |
BATCH_EVENTS=$(wc -l < "$BATCH_FILE") | |
echo "Processing batch $BATCH_COUNT/$TOTAL_BATCHES ($BATCH_EVENTS events)..." | |
# Make sure we have proper JSON array format for the batch | |
# Wrap the lines in square brackets and add commas between events | |
TMP_JSON="$TEMP_DIR/request_$BATCH_COUNT.json" | |
echo "[" > "$TMP_JSON" | |
sed -e '$! s/$/,/' "$BATCH_FILE" >> "$TMP_JSON" | |
echo "]" >> "$TMP_JSON" | |
# Submit the batch to Mixpanel | |
RESPONSE=$(curl --silent --request POST \ | |
--url "https://api.mixpanel.com/import?strict=1&project_id=$PROJECT_ID" \ | |
--header 'Content-Type: application/json' \ | |
--header 'accept: application/json' \ | |
--header "authorization: Basic $API_SECRET" \ | |
--data @"$TMP_JSON") | |
# Parse the response using jq if available, otherwise use grep | |
if command -v jq &> /dev/null; then | |
STATUS=$(echo "$RESPONSE" | jq -r '.status // ""') | |
IMPORTED=$(echo "$RESPONSE" | jq -r '.num_records_imported // 0') | |
else | |
STATUS=$(echo "$RESPONSE" | grep -o '"status":"[^"]*"' | cut -d'"' -f4) | |
IMPORTED=$(echo "$RESPONSE" | grep -o '"num_records_imported":[0-9]*' | cut -d':' -f2) | |
# Default to 0 if not found | |
IMPORTED=${IMPORTED:-0} | |
fi | |
# Check response and update counters | |
if [ "$STATUS" = "OK" ] || [ "$IMPORTED" -gt 0 ]; then | |
echo "SUCCESS: $IMPORTED events imported" | |
SUCCESSFUL_EVENTS=$((SUCCESSFUL_EVENTS + IMPORTED)) | |
else | |
echo "FAILED: $RESPONSE" | |
FAILED_EVENTS=$((FAILED_EVENTS + BATCH_EVENTS)) | |
fi | |
# Add a small delay to avoid rate limiting | |
sleep 0.5 | |
# Progress report | |
PERCENT_COMPLETE=$(( (BATCH_COUNT * 100) / TOTAL_BATCHES )) | |
echo "Progress: $PERCENT_COMPLETE% complete ($SUCCESSFUL_EVENTS successful, $FAILED_EVENTS failed)" | |
done | |
# Clean up | |
echo "Cleaning up temporary files..." | |
rm -rf "$TEMP_DIR" | |
echo "Import completed!" | |
echo "Total events processed: $TOTAL_EVENTS" | |
echo "Successful events: $SUCCESSFUL_EVENTS" | |
echo "Failed events: $FAILED_EVENTS" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment