Last active
October 18, 2017 23:02
-
-
Save eseiver/c9b11fcffe37c0c58e4983a19f7f6f09 to your computer and use it in GitHub Desktop.
For renaming annotation articles in existing PLOS XML corpus directory
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This renames annotation article files in an existing local copy of corpusdir to fit the new naming scheme. | |
See https://github.com/PLOS/allofplos/issues/28 | |
""" | |
import os | |
import re | |
from plos_corpus import listdir_nohidden, corpusdir | |
from plos_regex import validate_file | |
annotation_articles = [article for article in listdir_nohidden(corpusdir) if 'correction' in article] | |
count = 0 | |
for article in annotation_articles: | |
parts = re.split('\/|\.', article) | |
new_filename = os.path.join(corpusdir, 'plos.correction.' + parts[-2] + '.xml') | |
if validate_file(new_filename) and new_filename != article: | |
os.rename(article, new_filename) | |
count += 1 | |
else: | |
pass | |
print('{} files renamed'.format(count)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment