Created
July 28, 2024 06:59
-
-
Save mattwang44/51f97e56b695758003117db6bac05b31 to your computer and use it in GitHub Desktop.
Visualize zh_TW translation progress of Python official docs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Visualize zh_TW translation progress of Python official docs | |
Run the script under the project root of the repo. | |
ref: https://github.com/python/python-docs-zh-tw | |
""" | |
import logging | |
from pathlib import Path | |
import pandas as pd | |
import polib | |
from git import Repo | |
root_path = Path(__file__) | |
repo = Repo(root_path) | |
current_head = repo.head.commit | |
repo.git.checkout('3.12') | |
def _get_po_paths(root: Path) -> list[Path]: | |
"""Find all .po files in given path""" | |
if not root.exists(): | |
logging.error(f"The path '{root.absolute()}' does not exist!") | |
if root.is_file(): | |
return [root.resolve()] | |
po_paths = [p.resolve() for p in root.glob("**/*.po") if '.git' not in str(p)] | |
return po_paths | |
def _get_progress() -> float: | |
po_files = _get_po_paths(Path(root_path).resolve()) | |
errors = [] | |
num_translated = 0 | |
num_entries = 0 | |
for path in po_files: | |
try: | |
pofile = polib.pofile(path) | |
except OSError: | |
errors.append(f"{path} doesn't seem to be a .po file") | |
continue | |
num_entries += (len(pofile) - len(pofile.obsolete_entries())) | |
num_translated += len(pofile.translated_entries()) | |
progress = round(100 * num_translated / num_entries, 2) | |
if errors: | |
print(errors) | |
return progress | |
if __name__ == '__main__': | |
memo = {} | |
prev_commit_time = None | |
commits = list(repo.iter_commits()) | |
for commit in commits: | |
if prev_commit_time and commit.committed_datetime.date() == prev_commit_time.date(): | |
print(commit.committed_datetime, commit, 'skipped') | |
continue | |
else: | |
print(commit.committed_datetime, commit) | |
prev_commit_time = commit.committed_datetime | |
repo.git.checkout(commit) | |
try: | |
progress = _get_progress() | |
memo[commit] = (commit.committed_datetime.timestamp(), progress) | |
except Exception as e: | |
print(e) | |
repo.git.checkout(current_head) | |
# plot | |
df = pd.DataFrame(memo).T | |
df.columns = ['timestamp', 'progress'] | |
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') | |
df['progress'] = df['progress'].astype(float) | |
df['date'] = df['timestamp'].dt.date | |
ax = df.plot(x='date', y='progress', title='Translation progress based on commit history') | |
ax.set_xlabel('Date') | |
ax.set_ylabel('Progress (%)') | |
ax.grid() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment