Created
December 26, 2024 10:54
-
-
Save M0r13n/ad2bf4846ac4d62a91422421977fb1b3 to your computer and use it in GitHub Desktop.
Backport of pathlibs `full_match` to Python 3.10+
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pathlib | |
import re | |
import os | |
import functools | |
def _translate(pat, STAR, QUESTION_MARK): | |
res = [] | |
add = res.append | |
i, n = 0, len(pat) | |
while i < n: | |
c = pat[i] | |
i = i+1 | |
if c == '*': | |
# compress consecutive `*` into one | |
if (not res) or res[-1] is not STAR: | |
add(STAR) | |
elif c == '?': | |
add(QUESTION_MARK) | |
elif c == '[': | |
j = i | |
if j < n and pat[j] == '!': | |
j = j+1 | |
if j < n and pat[j] == ']': | |
j = j+1 | |
while j < n and pat[j] != ']': | |
j = j+1 | |
if j >= n: | |
add('\\[') | |
else: | |
stuff = pat[i:j] | |
if '-' not in stuff: | |
stuff = stuff.replace('\\', r'\\') | |
else: | |
chunks = [] | |
k = i+2 if pat[i] == '!' else i+1 | |
while True: | |
k = pat.find('-', k, j) | |
if k < 0: | |
break | |
chunks.append(pat[i:k]) | |
i = k+1 | |
k = k+3 | |
chunk = pat[i:j] | |
if chunk: | |
chunks.append(chunk) | |
else: | |
chunks[-1] += '-' | |
# Remove empty ranges -- invalid in RE. | |
for k in range(len(chunks)-1, 0, -1): | |
if chunks[k-1][-1] > chunks[k][0]: | |
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:] | |
del chunks[k] | |
# Escape backslashes and hyphens for set difference (--). | |
# Hyphens that create ranges shouldn't be escaped. | |
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') | |
for s in chunks) | |
# Escape set operations (&&, ~~ and ||). | |
stuff = re.sub(r'([&~|])', r'\\\1', stuff) | |
i = j+1 | |
if not stuff: | |
# Empty range: never match. | |
add('(?!)') | |
elif stuff == '!': | |
# Negated empty range: match any character. | |
add('.') | |
else: | |
if stuff[0] == '!': | |
stuff = '^' + stuff[1:] | |
elif stuff[0] in ('^', '['): | |
stuff = '\\' + stuff | |
add(f'[{stuff}]') | |
else: | |
add(re.escape(c)) | |
assert i == n | |
return res | |
def translate(pat, *, recursive=False, include_hidden=False, seps=None): | |
"""Translate a pathname with shell wildcards to a regular expression. | |
If `recursive` is true, the pattern segment '**' will match any number of | |
path segments. | |
If `include_hidden` is true, wildcards can match path segments beginning | |
with a dot ('.'). | |
If a sequence of separator characters is given to `seps`, they will be | |
used to split the pattern into segments and match path separators. If not | |
given, os.path.sep and os.path.altsep (where available) are used. | |
""" | |
if not seps: | |
if os.path.altsep: | |
seps = (os.path.sep, os.path.altsep) | |
else: | |
seps = os.path.sep | |
escaped_seps = ''.join(map(re.escape, seps)) | |
any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps | |
not_sep = f'[^{escaped_seps}]' | |
if include_hidden: | |
one_last_segment = f'{not_sep}+' | |
one_segment = f'{one_last_segment}{any_sep}' | |
any_segments = f'(?:.+{any_sep})?' | |
any_last_segments = '.*' | |
else: | |
one_last_segment = f'[^{escaped_seps}.]{not_sep}*' | |
one_segment = f'{one_last_segment}{any_sep}' | |
any_segments = f'(?:{one_segment})*' | |
any_last_segments = f'{any_segments}(?:{one_last_segment})?' | |
results = [] | |
parts = re.split(any_sep, pat) | |
last_part_idx = len(parts) - 1 | |
for idx, part in enumerate(parts): | |
if part == '*': | |
results.append(one_segment if idx < last_part_idx else one_last_segment) | |
elif recursive and part == '**': | |
if idx < last_part_idx: | |
if parts[idx + 1] != '**': | |
results.append(any_segments) | |
else: | |
results.append(any_last_segments) | |
else: | |
if part: | |
if not include_hidden and part[0] in '*?': | |
results.append(r'(?!\.)') | |
results.extend(_translate(part, f'{not_sep}*', not_sep)) | |
if idx < last_part_idx: | |
results.append(any_sep) | |
res = ''.join(results) | |
return fr'(?s:{res})\Z' | |
@functools.lru_cache(maxsize=512) | |
def _compile_pattern(pat, sep, case_sensitive, recursive=True) -> re.Pattern: | |
flags = re.NOFLAG if case_sensitive else re.IGNORECASE | |
regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep) | |
return re.compile(regex, flags=flags).match | |
def full_match(path: pathlib.Path, pattern:str, case_sensitive=True) -> bool: | |
pattern_path = path.with_segments(pattern) | |
f = pattern_path._flavour | |
match = _compile_pattern(pattern, f.sep, case_sensitive, True) | |
return match(str(path)) is not None | |
if __name__ == '__main__': | |
from pathlib import Path | |
# Single-part patterns | |
assert not full_match(Path('a/b.py'), 'b.py') | |
assert not full_match(Path('/a/b.py'), 'b.py') | |
assert not full_match(Path('a.py'), 'b.py') | |
assert not full_match(Path('b/py'), 'b.py') | |
assert not full_match(Path('/a.py'), 'b.py') | |
assert not full_match(Path('b.py/c'), 'b.py') | |
# Wildcard relative pattern. | |
assert full_match(Path('b.py'), '*.py') | |
assert not full_match(Path('a/b.py'), '*.py') | |
assert not full_match(Path('/a/b.py'), '*.py') | |
assert not full_match(Path('b.pyc'), '*.py') | |
assert not full_match(Path('b./py'), '*.py') | |
assert not full_match(Path('b.py/c'), '*.py') | |
# Multi-part relative pattern. | |
assert full_match(Path('ab/c.py'), 'a*/*.py') | |
assert not full_match(Path('/d/ab/c.py'), 'a*/*.py') | |
assert not full_match(Path('a.py'), 'a*/*.py') | |
assert not full_match(Path('/dab/c.py'), 'a*/*.py') | |
assert not full_match(Path('ab/c.py/d'), 'a*/*.py') | |
# Absolute pattern. | |
assert full_match(Path('/b.py'), '/*.py') | |
assert not full_match(Path('b.py'), '/*.py') | |
assert not full_match(Path('a/b.py'), '/*.py') | |
assert not full_match(Path('/a/b.py'), '/*.py') | |
# Multi-part absolute pattern. | |
assert full_match(Path('/a/b.py'), '/a/*.py') | |
assert not full_match(Path('/ab.py'), '/a/*.py') | |
assert not full_match(Path('/a/b/c.py'), '/a/*.py') | |
# Multi-part glob-style patterns | |
assert full_match(Path('a'), '**') | |
assert full_match(Path('c.py'), '**') | |
assert full_match(Path('a/b/c.py'), '**') | |
assert full_match(Path('/a/b/c.py'), '**') | |
assert full_match(Path('/a/b/c.py'), '/**') | |
assert full_match(Path('/a/b/c.py'), '/a/**') | |
assert full_match(Path('/a/b/c.py'), '**/*.py') | |
assert full_match(Path('/a/b/c.py'), '/**/*.py') | |
assert full_match(Path('/a/b/c.py'), '/a/**/*.py') | |
assert full_match(Path('/a/b/c.py'), '/a/b/**/*.py') | |
assert full_match(Path('/a/b/c.py'), '/**/**/**/**/*.py') | |
assert not full_match(Path('c.py'), '**/a.py') | |
assert not full_match(Path('c.py'), 'c/**') | |
assert not full_match(Path('a/b/c.py'), '**/a') | |
assert not full_match(Path('a/b/c.py'), '**/a/b') | |
assert not full_match(Path('a/b/c.py'), '**/a/b/c') | |
assert not full_match(Path('a/b/c.py'), '**/a/b/c.') | |
assert not full_match(Path('a/b/c.py'), '**/a/b/c./**') | |
assert not full_match(Path('a/b/c.py'), '**/a/b/c./**') | |
assert not full_match(Path('a/b/c.py'), '/a/b/c.py/**') | |
assert not full_match(Path('a/b/c.py'), '/**/a/b/c.py') | |
# Case-sensitive flag | |
assert not full_match(Path('A.py'), 'a.PY', case_sensitive=True) | |
assert full_match(Path('A.py'), 'a.PY', case_sensitive=False) | |
assert not full_match(Path('c:/a/B.Py'), 'C:/A/*.pY', case_sensitive=True) | |
assert full_match(Path('/a/b/c.py'), '/A/*/*.Py', case_sensitive=False) | |
# Matching against empty path | |
#assert not full_match(Path(''), '*') | |
assert full_match(Path(''), '**') | |
#assert not full_match(Path(''), '**/*') | |
# Matching with empty pattern | |
#assert full_match(Path(''), '') | |
assert full_match(Path('.'), '.') | |
assert not full_match(Path('/'), '') | |
assert not full_match(Path('/'), '.') | |
assert not full_match(Path('foo'), '') | |
assert not full_match(Path('foo'), '.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment