omc8db · July 7, 2023 07:00
diff --git a/fed2wap.py b/fed2wap.py
 #!/usr/bin/env python3
 import sys, re

 MIN_MATCH=3
 MAX_MATCH=9
 # The lyrics of "Wet Ass Pussy" by Cardi B
 target=open(sys.argv[1]).read()
 # United States House Resolution 2617, Omnibus spending bill for FY2023
 reference=open(sys.argv[2]).read().lower()

 def sanitize(s): return re.sub('[\W_]+', '', s.lower())

 # Sanitize the input by converting to lowercase and removing all non-letters
 target = re.sub('[\W_]+', '', target.lower())

 # Modifiers that can be added to regex letters
 OPTIONAL_WITH_NOISE='?[^a-z\n]*'
 WITH_NOISE='[^a-z\n]*'
 def regex_modify(s, option):
    return re.sub('([a-z])', r'\1' + option, s)

 # ANSI Output modifiers
 BOLD = '\033[1m'
 PURPLE = '\033[95m'
 UNDERLINE = '\033[4m'
 END = '\033[0m'

 offset = 0

 def longest_match_regex(s):
    """Return a regex that matches any number of letters from s"""
    result = ""
    for c in s:
        result += f"({c}[^a-z\n]*"
    for _ in s:
        result += ")?"
    return result

 while reference and target:
    searchstr = regex_modify(target[:MIN_MATCH], WITH_NOISE)
    searchstr += longest_match_regex(target[MIN_MATCH:MAX_MATCH])
    line = re.search(f"\n.*({searchstr}).*\n", reference, re.IGNORECASE)
    if line is None:
        break
    bolded_line = re.sub(f'({searchstr})', BOLD + PURPLE + UNDERLINE + r'\1' + END, line.group(0), re.IGNORECASE).strip()
    # Find boundaries of word match within the line
    mstart, mend = line.span(1)
    matched_chars = len(re.sub('[^a-zA-Z]', '', line.group(1)))
    print(f"Omnibus spending bill, characters {offset + mstart} to {offset + mend}")
    print("\t" + bolded_line)
    target = target[matched_chars:]
    reference = reference[mend:]
    offset += mstart
	#!/usr/bin/env python3
	import sys, re

	MIN_MATCH=3
	MAX_MATCH=9
	# The lyrics of "Wet Ass Pussy" by Cardi B
	target=open(sys.argv[1]).read()
	# United States House Resolution 2617, Omnibus spending bill for FY2023
	reference=open(sys.argv[2]).read().lower()

	def sanitize(s): return re.sub('[\W_]+', '', s.lower())

	# Sanitize the input by converting to lowercase and removing all non-letters
	target = re.sub('[\W_]+', '', target.lower())

	# Modifiers that can be added to regex letters
	OPTIONAL_WITH_NOISE='?[^a-z\n]*'
	WITH_NOISE='[^a-z\n]*'
	def regex_modify(s, option):
	return re.sub('([a-z])', r'\1' + option, s)

	# ANSI Output modifiers
	BOLD = '\033[1m'
	PURPLE = '\033[95m'
	UNDERLINE = '\033[4m'
	END = '\033[0m'

	offset = 0

	def longest_match_regex(s):
	"""Return a regex that matches any number of letters from s"""
	result = ""
	for c in s:
	result += f"({c}[^a-z\n]*"
	for _ in s:
	result += ")?"
	return result

	while reference and target:
	searchstr = regex_modify(target[:MIN_MATCH], WITH_NOISE)
	searchstr += longest_match_regex(target[MIN_MATCH:MAX_MATCH])
	line = re.search(f"\n.({searchstr}).\n", reference, re.IGNORECASE)
	if line is None:
	break
	bolded_line = re.sub(f'({searchstr})', BOLD + PURPLE + UNDERLINE + r'\1' + END, line.group(0), re.IGNORECASE).strip()
	# Find boundaries of word match within the line
	mstart, mend = line.span(1)
	matched_chars = len(re.sub('[^a-zA-Z]', '', line.group(1)))
	print(f"Omnibus spending bill, characters {offset + mstart} to {offset + mend}")
	print("\t" + bolded_line)
	target = target[matched_chars:]
	reference = reference[mend:]
	offset += mstart