Created
January 17, 2024 19:37
-
-
Save vlio20/477908c6951dfce75f1ac01768261b65 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
import os | |
from concurrent.futures import ThreadPoolExecutor | |
# Global variables | |
S3_BUCKET_NAME = 'yyyyy' | |
S3_PREFIX = 'xxxxl' | |
LOCAL_DOWNLOAD_DIRECTORY = './dest' | |
NUM_THREADS = 100 | |
def download_file(bucket_name, key, local_directory): | |
local_file_path = os.path.join(local_directory, os.path.basename(key)) | |
s3 = boto3.client('s3') | |
try: | |
s3.download_file(bucket_name, key, local_file_path) | |
print(f"Downloaded: {key} to {local_file_path}") | |
except Exception as e: | |
print(f"Error downloading {key}: {e}") | |
def download_files_from_s3(bucket_name, prefix, local_directory, num_threads=10): | |
s3 = boto3.client('s3') | |
continuation_token = None | |
while True: | |
# List objects in the specified S3 bucket and prefix | |
list_objects_params = { | |
'Bucket': bucket_name, | |
'Prefix': prefix, | |
} | |
if continuation_token: | |
list_objects_params['ContinuationToken'] = continuation_token | |
response = s3.list_objects_v2(**list_objects_params) | |
keys = [obj['Key'] for obj in response.get('Contents', [])] | |
# Ensure the local download directory exists | |
if not os.path.exists(local_directory): | |
os.makedirs(local_directory) | |
with ThreadPoolExecutor(max_workers=num_threads) as executor: | |
# Use executor to run downloads in parallel | |
for key in keys: | |
executor.submit(download_file, bucket_name, key, local_directory) | |
if response.get('IsTruncated'): | |
continuation_token = response['NextContinuationToken'] | |
else: | |
break | |
if __name__ == "__main__": | |
# Download files from S3 in parallel with pagination | |
download_files_from_s3(S3_BUCKET_NAME, S3_PREFIX, LOCAL_DOWNLOAD_DIRECTORY, NUM_THREADS) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment