Skip to content

Instantly share code, notes, and snippets.

@M0r13n
Created December 26, 2024 10:54
Show Gist options
  • Save M0r13n/ad2bf4846ac4d62a91422421977fb1b3 to your computer and use it in GitHub Desktop.
Save M0r13n/ad2bf4846ac4d62a91422421977fb1b3 to your computer and use it in GitHub Desktop.
Backport of pathlibs `full_match` to Python 3.10+
import pathlib
import re
import os
import functools
def _translate(pat, STAR, QUESTION_MARK):
res = []
add = res.append
i, n = 0, len(pat)
while i < n:
c = pat[i]
i = i+1
if c == '*':
# compress consecutive `*` into one
if (not res) or res[-1] is not STAR:
add(STAR)
elif c == '?':
add(QUESTION_MARK)
elif c == '[':
j = i
if j < n and pat[j] == '!':
j = j+1
if j < n and pat[j] == ']':
j = j+1
while j < n and pat[j] != ']':
j = j+1
if j >= n:
add('\\[')
else:
stuff = pat[i:j]
if '-' not in stuff:
stuff = stuff.replace('\\', r'\\')
else:
chunks = []
k = i+2 if pat[i] == '!' else i+1
while True:
k = pat.find('-', k, j)
if k < 0:
break
chunks.append(pat[i:k])
i = k+1
k = k+3
chunk = pat[i:j]
if chunk:
chunks.append(chunk)
else:
chunks[-1] += '-'
# Remove empty ranges -- invalid in RE.
for k in range(len(chunks)-1, 0, -1):
if chunks[k-1][-1] > chunks[k][0]:
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
del chunks[k]
# Escape backslashes and hyphens for set difference (--).
# Hyphens that create ranges shouldn't be escaped.
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
for s in chunks)
# Escape set operations (&&, ~~ and ||).
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
i = j+1
if not stuff:
# Empty range: never match.
add('(?!)')
elif stuff == '!':
# Negated empty range: match any character.
add('.')
else:
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
add(f'[{stuff}]')
else:
add(re.escape(c))
assert i == n
return res
def translate(pat, *, recursive=False, include_hidden=False, seps=None):
"""Translate a pathname with shell wildcards to a regular expression.
If `recursive` is true, the pattern segment '**' will match any number of
path segments.
If `include_hidden` is true, wildcards can match path segments beginning
with a dot ('.').
If a sequence of separator characters is given to `seps`, they will be
used to split the pattern into segments and match path separators. If not
given, os.path.sep and os.path.altsep (where available) are used.
"""
if not seps:
if os.path.altsep:
seps = (os.path.sep, os.path.altsep)
else:
seps = os.path.sep
escaped_seps = ''.join(map(re.escape, seps))
any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps
not_sep = f'[^{escaped_seps}]'
if include_hidden:
one_last_segment = f'{not_sep}+'
one_segment = f'{one_last_segment}{any_sep}'
any_segments = f'(?:.+{any_sep})?'
any_last_segments = '.*'
else:
one_last_segment = f'[^{escaped_seps}.]{not_sep}*'
one_segment = f'{one_last_segment}{any_sep}'
any_segments = f'(?:{one_segment})*'
any_last_segments = f'{any_segments}(?:{one_last_segment})?'
results = []
parts = re.split(any_sep, pat)
last_part_idx = len(parts) - 1
for idx, part in enumerate(parts):
if part == '*':
results.append(one_segment if idx < last_part_idx else one_last_segment)
elif recursive and part == '**':
if idx < last_part_idx:
if parts[idx + 1] != '**':
results.append(any_segments)
else:
results.append(any_last_segments)
else:
if part:
if not include_hidden and part[0] in '*?':
results.append(r'(?!\.)')
results.extend(_translate(part, f'{not_sep}*', not_sep))
if idx < last_part_idx:
results.append(any_sep)
res = ''.join(results)
return fr'(?s:{res})\Z'
@functools.lru_cache(maxsize=512)
def _compile_pattern(pat, sep, case_sensitive, recursive=True) -> re.Pattern:
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep)
return re.compile(regex, flags=flags).match
def full_match(path: pathlib.Path, pattern:str, case_sensitive=True) -> bool:
pattern_path = path.with_segments(pattern)
f = pattern_path._flavour
match = _compile_pattern(pattern, f.sep, case_sensitive, True)
return match(str(path)) is not None
if __name__ == '__main__':
from pathlib import Path
# Single-part patterns
assert not full_match(Path('a/b.py'), 'b.py')
assert not full_match(Path('/a/b.py'), 'b.py')
assert not full_match(Path('a.py'), 'b.py')
assert not full_match(Path('b/py'), 'b.py')
assert not full_match(Path('/a.py'), 'b.py')
assert not full_match(Path('b.py/c'), 'b.py')
# Wildcard relative pattern.
assert full_match(Path('b.py'), '*.py')
assert not full_match(Path('a/b.py'), '*.py')
assert not full_match(Path('/a/b.py'), '*.py')
assert not full_match(Path('b.pyc'), '*.py')
assert not full_match(Path('b./py'), '*.py')
assert not full_match(Path('b.py/c'), '*.py')
# Multi-part relative pattern.
assert full_match(Path('ab/c.py'), 'a*/*.py')
assert not full_match(Path('/d/ab/c.py'), 'a*/*.py')
assert not full_match(Path('a.py'), 'a*/*.py')
assert not full_match(Path('/dab/c.py'), 'a*/*.py')
assert not full_match(Path('ab/c.py/d'), 'a*/*.py')
# Absolute pattern.
assert full_match(Path('/b.py'), '/*.py')
assert not full_match(Path('b.py'), '/*.py')
assert not full_match(Path('a/b.py'), '/*.py')
assert not full_match(Path('/a/b.py'), '/*.py')
# Multi-part absolute pattern.
assert full_match(Path('/a/b.py'), '/a/*.py')
assert not full_match(Path('/ab.py'), '/a/*.py')
assert not full_match(Path('/a/b/c.py'), '/a/*.py')
# Multi-part glob-style patterns
assert full_match(Path('a'), '**')
assert full_match(Path('c.py'), '**')
assert full_match(Path('a/b/c.py'), '**')
assert full_match(Path('/a/b/c.py'), '**')
assert full_match(Path('/a/b/c.py'), '/**')
assert full_match(Path('/a/b/c.py'), '/a/**')
assert full_match(Path('/a/b/c.py'), '**/*.py')
assert full_match(Path('/a/b/c.py'), '/**/*.py')
assert full_match(Path('/a/b/c.py'), '/a/**/*.py')
assert full_match(Path('/a/b/c.py'), '/a/b/**/*.py')
assert full_match(Path('/a/b/c.py'), '/**/**/**/**/*.py')
assert not full_match(Path('c.py'), '**/a.py')
assert not full_match(Path('c.py'), 'c/**')
assert not full_match(Path('a/b/c.py'), '**/a')
assert not full_match(Path('a/b/c.py'), '**/a/b')
assert not full_match(Path('a/b/c.py'), '**/a/b/c')
assert not full_match(Path('a/b/c.py'), '**/a/b/c.')
assert not full_match(Path('a/b/c.py'), '**/a/b/c./**')
assert not full_match(Path('a/b/c.py'), '**/a/b/c./**')
assert not full_match(Path('a/b/c.py'), '/a/b/c.py/**')
assert not full_match(Path('a/b/c.py'), '/**/a/b/c.py')
# Case-sensitive flag
assert not full_match(Path('A.py'), 'a.PY', case_sensitive=True)
assert full_match(Path('A.py'), 'a.PY', case_sensitive=False)
assert not full_match(Path('c:/a/B.Py'), 'C:/A/*.pY', case_sensitive=True)
assert full_match(Path('/a/b/c.py'), '/A/*/*.Py', case_sensitive=False)
# Matching against empty path
#assert not full_match(Path(''), '*')
assert full_match(Path(''), '**')
#assert not full_match(Path(''), '**/*')
# Matching with empty pattern
#assert full_match(Path(''), '')
assert full_match(Path('.'), '.')
assert not full_match(Path('/'), '')
assert not full_match(Path('/'), '.')
assert not full_match(Path('foo'), '')
assert not full_match(Path('foo'), '.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment