Last active
February 7, 2025 13:41
-
-
Save mikeller/7034d99bc27c361fc6a2df84e19c36ff to your computer and use it in GitHub Desktop.
gitlab_remove_expired_artifacts_by_group.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import time | |
import requests | |
import sys | |
from datetime import datetime, timezone | |
from dateutil import parser | |
import re | |
def print_stderr(*args, **kwargs): | |
print(*args, file=sys.stderr, **kwargs) | |
if len(sys.argv) != 4: | |
print(f'Usage: {sys.argv[0]} <server> <token> <group id>') | |
exit(1) | |
server = sys.argv[1] | |
token = sys.argv[2] | |
group_id = sys.argv[3] | |
now = datetime.now(timezone.utc) | |
projects_url = f"https://{server}/api/v4/groups/{group_id}/projects?per_page=500" | |
projects = {} | |
response = requests.get( | |
projects_url, | |
headers={ | |
'private-token': token, | |
}, | |
) | |
response.raise_for_status() | |
response_json = response.json() | |
project_ids = [] | |
for project in response_json: | |
namespace_id = project['namespace']['id'] | |
if int(namespace_id) == int(group_id): | |
project_id = project['id'] | |
project_ids.append(int(project_id)) | |
print_stderr(f'Number of projects found: {len(project_ids)}') | |
overall_space_use = 0 | |
overall_space_savings = 0 | |
for project_id in project_ids: | |
print_stderr(f'Processing project {project_id}:') | |
merge_request_url = f"https://{server}/api/v4/projects/{project_id}/merge_requests?scope=all&per_page=100&page=1" | |
merge_requests = {} | |
while merge_request_url: | |
response = requests.get( | |
merge_request_url, | |
headers={ | |
'private-token': token, | |
}, | |
) | |
if response.status_code in [500, 429]: | |
print_stderr(f"Status {response.status_code}, retrying.") | |
time.sleep(10) | |
continue | |
response.raise_for_status() | |
response_json = response.json() | |
for merge_request in response_json: | |
iid = merge_request.get('iid', None) | |
if iid: | |
merge_requests[int(iid)] = merge_request['state'] | |
merge_request_url = response.links.get('next', {}).get('url', None) | |
branch_url = f"https://{server}/api/v4/projects/{project_id}/repository/branches?per_page=100&page=1" | |
unmerged_branches = [] | |
while branch_url: | |
response = requests.get( | |
branch_url, | |
headers={ | |
'private-token': token, | |
}, | |
) | |
if response.status_code in [500, 429]: | |
print_stderr(f"Status {response.status_code}, retrying.") | |
time.sleep(10) | |
continue | |
response.raise_for_status() | |
response_json = response.json() | |
for branch in response_json: | |
is_merged = branch['merged'] | |
if not is_merged: | |
unmerged_branches.append(branch['name']) | |
branch_url = response.links.get('next', {}).get('url', None) | |
url = f"https://{server}/api/v4/projects/{project_id}/jobs?per_page=100&page=1" | |
job_count = 0 | |
artifact_count = 0 | |
artifact_size = 0 | |
deleted_artifact_count = 0 | |
deleted_artifact_size = 0 | |
while url: | |
response = requests.get( | |
url, | |
headers={ | |
'private-token': token, | |
}, | |
) | |
if response.status_code in [500, 429]: | |
print_stderr(f"Status {response.status_code}, retrying.") | |
time.sleep(10) | |
continue | |
response.raise_for_status() | |
response_json = response.json() | |
for job in response_json: | |
job_count += 1 | |
artifacts = job.get('artifacts', None) | |
artifacts_expire_at_string = job.get('artifacts_expire_at', None) | |
artifacts_expire_at = None | |
if artifacts_expire_at_string: | |
artifacts_expire_at = parser.parse(artifacts_expire_at_string) | |
has_expired_artifacts = False | |
deleted_job_artifact_count = 0 | |
deleted_job_artifact_size = 0 | |
if artifacts: | |
for artifact in artifacts: | |
if artifact['filename'] != 'job.log': | |
size = artifact['size'] | |
artifact_count += 1 | |
artifact_size += size | |
if not artifacts_expire_at or artifacts_expire_at < now: | |
has_expired_artifacts = True | |
deleted_job_artifact_count += 1 | |
deleted_job_artifact_size += size | |
delete_artifacts = False | |
if has_expired_artifacts: | |
ref = job['ref'] | |
merge_request_iid_match = re.search(r'refs\/merge-requests\/(\d+)\/head', ref) | |
if merge_request_iid_match: | |
merge_request_iid = merge_request_iid_match.group(1) | |
if merge_request_iid: | |
merge_request_status = merge_requests.get(int(merge_request_iid)) | |
if merge_request_status in ['merged', 'closed', None]: | |
delete_artifacts = True | |
deleted_artifact_count += deleted_job_artifact_count | |
deleted_artifact_size += deleted_job_artifact_size | |
elif ref not in unmerged_branches: | |
delete_artifacts = True | |
deleted_artifact_count += deleted_job_artifact_count | |
deleted_artifact_size += deleted_job_artifact_size | |
if delete_artifacts: | |
job_id = job['id'] | |
print_stderr(f"Processing job ID: {job_id}", end="") | |
delete_response = requests.delete( | |
f"https://{server}/api/v4/projects/{project_id}/jobs/{job_id}/artifacts", | |
headers={ | |
'private-token': token, | |
}, | |
) | |
print_stderr(f" - status: {delete_response.status_code}\033[K", end = "\r") | |
print_stderr(f'Processed page {url}.\033[K', end = "\r") | |
url = response.links.get('next', {}).get('url', None) | |
overall_space_use += artifact_size | |
overall_space_savings += deleted_artifact_size | |
print_stderr() | |
print_stderr(f'Jobs analysed: {job_count}'); | |
print_stderr(f'Pre artifact count: {artifact_count}'); | |
print_stderr(f'Pre artifact size [MB]: {artifact_size / (1024 * 1024)}') | |
print_stderr(f'Post artifact count: {artifact_count - deleted_artifact_count}') | |
print_stderr(f'Post artifact size [MB]: {(artifact_size - deleted_artifact_size) / (1024 * 1024)}') | |
print_stderr() | |
print(f'Overall artifact space savings [MB]: {overall_space_savings / (1024 * 1024)}') | |
print(f'Overall artifact space usage after cleanup [MB]: {(overall_space_use - overall_space_savings) / (1024 * 1024)}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment