Created
April 16, 2020 18:32
-
-
Save mkweskin/b0203c9d9ec0a1f748f4d2ec63e8e599 to your computer and use it in GitHub Desktop.
A general utility to do a batch find/replace. Takes a translation file with the find/replace pairs and a file to be translated.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Author: Matthew Kweskin, github: @mkweskin | |
A general utility to read in a delimited translation file with two columns | |
and rename any text file with these values. | |
""" | |
import argparse | |
from os import path | |
import sys | |
import re | |
import codecs | |
if (sys.version_info < (3, 0)): | |
raise Exception("This script requires python3. One way to install this is with miniconda3: https://docs.conda.io/en/latest/miniconda.html") | |
def unescaped_str(arg_str): | |
""" | |
Allows for tab characters in the arguments | |
""" | |
return codecs.decode(str(arg_str), 'unicode_escape') | |
def get_args(): | |
parser = argparse.ArgumentParser(description="Text replacement utility that gets values to find/replace from a translation file (tab separated file as default, others delimiters can be specified.)", formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
parser.add_argument("TRANSLATION_FILE", help = 'File file with the pairs of values to find/replace. Note: if there are >1 delimiter in a line, the text following the final delimiter character will be used as the replace value.') | |
parser.add_argument("INPUT", help = 'File to perform the find/replace on.') | |
parser.add_argument("--output", help = 'Output file. Use \'STDOUT\' to output to screen.', default = ('STDOUT')) | |
parser.add_argument("--overwrite", action="store_true", help = 'Automatically overwrite the output file and log file if they\'re already present') | |
parser.add_argument("--delimiter", help = 'Character(s) to separate the fields. Use \'\\t\' for tab (default).', type=unescaped_str, default = ('\t')) | |
parser.add_argument("--quiet", action="store_true", help = 'Disable warning messages') | |
parser.add_argument("--trim", action="store_true", help = 'Trim out extra white space before and after each find/replace pair') | |
parser.add_argument("--reverse", action="store_true", help = 'Reverse columns 1 and 2 in the translation file') | |
return parser.parse_args() | |
def file_checks(args): | |
""" | |
Some checks on the input/output/log files | |
""" | |
if not path.exists(args.INPUT): | |
raise Exception("Input file does not exist") | |
if not path.exists(args.TRANSLATION_FILE): | |
raise Exception("Translation file does not exist") | |
if args.output != 'STDOUT' and path.exists(args.output) and not args.overwrite: | |
answer = input("Output file exists. (use --overwrite to Automatically overwrite the output file)\nOverwrite [y/n]? ") | |
if answer.lower() != 'y': | |
print ("Exiting script.") | |
sys.exit() | |
outputdir = args.output != 'STDOUT' and path.dirname(args.output) | |
if not path.exists(outputdir) and outputdir != "": | |
raise Exception("Output directory does not exist") | |
def read_translation(translation_file, quiet, trim, delim, reverse): | |
""" | |
Read in a translation file as a dictionary | |
""" | |
dict = {} | |
with open(translation_file) as translation: | |
for line in translation: | |
line = line.rstrip('\r\n') | |
line_split = line.rsplit(delim, 1) | |
if len(line_split) != 2: | |
sys.stderr.write("ERROR: line is missing the delimiter character: \"" + str(line_split) + "\"\n") | |
sys.exit() | |
if reverse: | |
line_split = tuple(reversed(line_split)) | |
if trim: | |
line_split = list(map(str.strip, line_split)) | |
if line_split[0] in dict.keys() and not quiet: | |
sys.stderr.write ("Warning: the text to find \"" + str(line_split[0]) + "\" is already defined as \"" + str(dict[line_split[0]]) + "\", overwritting.\n") | |
if delim in line_split[0] and not quiet: | |
sys.stderr.write ("Warning: the line \"" + line + "\" contains an extra delimiter. All text to the left of the last delimiter will be used as the search string.\n") | |
(key, val) = line_split | |
dict[key] = val | |
return dict | |
def do_find_replace(infile, outfile, re_dict): | |
""" | |
This does the find/replace on the text file | |
""" | |
re_dict = dict((re.escape(k), v) for k, v in re_dict.items()) | |
pattern = re.compile("|".join(re_dict.keys())) | |
for line in infile: | |
line = pattern.sub(lambda m: re_dict[re.escape(m.group(0))], line) | |
if outfile == None: | |
sys.stdout.write(line) | |
else: | |
outfile.write(line) | |
def main(): | |
args = get_args() | |
file_checks(args) | |
re_dict = read_translation(args.TRANSLATION_FILE, args.quiet, args.trim, args.delimiter, args.reverse) | |
if args.output == "STDOUT": | |
with open(args.INPUT) as infile: | |
do_find_replace(infile, None, re_dict) | |
else: | |
with open(args.INPUT) as infile, open(args.output, 'w') as outfile: | |
do_find_replace(infile, outfile, re_dict) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment