Created
May 27, 2021 18:06
-
-
Save kaushalvivek/ae97d94c2583721f8128e0be80b78f48 to your computer and use it in GitHub Desktop.
Update ContentType for all objects in an S3 bucket based on file extension.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# imports | |
import boto3 | |
import json | |
# constants | |
# note that the access key should be of a user who has write access to the bucket named here | |
BUCKET_NAME = '<>' | |
AWS_ACCESS_KEY_ID = '<>' | |
AWS_SECRET_ACCESS_KEY = '<>' | |
s3_resource = boto3.resource('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, | |
aws_secret_access_key=AWS_SECRET_ACCESS_KEY) | |
s3_client = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, | |
aws_secret_access_key=AWS_SECRET_ACCESS_KEY) | |
bucket = s3_resource.Bucket(name=BUCKET_NAME) | |
class ContentType: | |
# add any other content types you want to this dict | |
# this script checks the object's extension to derive | |
# the intended content type | |
content_types = { | |
'.jpg': 'image/jpeg', | |
'.jpeg': 'image/jpeg', | |
'.png': 'image/png', | |
'.pdf': 'application/pdf', | |
'.docx': 'application/msword', | |
'.doc': 'application/msword', | |
'.mp4': 'video/mp4' | |
} | |
def get_content_type_from_name(self, name): | |
for ext in ContentType.content_types.keys(): | |
if ext in name: | |
return ContentType.content_types[ext] | |
return | |
instance = ContentType() | |
def update_metadata_for_object(obj, etag, content_type): | |
s3_client.copy_object(Key=obj.key, Bucket=BUCKET_NAME, | |
CopySource={"Bucket": BUCKET_NAME, "Key": obj.key}, | |
ContentType=content_type, | |
ContentDisposition='inline', | |
ACL='public-read', # makes the object publicly readable | |
CopySourceIfMatch=etag, | |
MetadataDirective="REPLACE") | |
return | |
def fetch_etag(obj): | |
metadata = s3_client.head_object(Bucket=BUCKET_NAME, Key=obj.key) | |
return metadata['ETag'] | |
print("Initiating metadata update ...") | |
for i,obj in enumerate(bucket.objects.all()): | |
etag = fetch_etag(obj) | |
print(f"Updating file #{i+1} -- ETag : {etag}") | |
target_object = s3_resource.Object(BUCKET_NAME, obj.key) | |
content_type = instance.get_content_type_from_name(target_object.key) | |
if not content_type: | |
print("Extension not supported.") | |
continue | |
update_metadata_for_object(target_object, etag, content_type) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment