M0r13n · December 26, 2024 10:54
diff --git a/fullmatch.py b/fullmatch.py
 import pathlib
 import re
 import os
 import functools

 def _translate(pat, STAR, QUESTION_MARK):
    res = []
    add = res.append
    i, n = 0, len(pat)
    while i < n:
        c = pat[i]
        i = i+1
        if c == '*':
            # compress consecutive `*` into one
            if (not res) or res[-1] is not STAR:
                add(STAR)
        elif c == '?':
            add(QUESTION_MARK)
        elif c == '[':
            j = i
            if j < n and pat[j] == '!':
                j = j+1
            if j < n and pat[j] == ']':
                j = j+1
            while j < n and pat[j] != ']':
                j = j+1
            if j >= n:
                add('\\[')
            else:
                stuff = pat[i:j]
                if '-' not in stuff:
                    stuff = stuff.replace('\\', r'\\')
                else:
                    chunks = []
                    k = i+2 if pat[i] == '!' else i+1
                    while True:
                        k = pat.find('-', k, j)
                        if k < 0:
                            break
                        chunks.append(pat[i:k])
                        i = k+1
                        k = k+3
                    chunk = pat[i:j]
                    if chunk:
                        chunks.append(chunk)
                    else:
                        chunks[-1] += '-'
                    # Remove empty ranges -- invalid in RE.
                    for k in range(len(chunks)-1, 0, -1):
                        if chunks[k-1][-1] > chunks[k][0]:
                            chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
                            del chunks[k]
                    # Escape backslashes and hyphens for set difference (--).
                    # Hyphens that create ranges shouldn't be escaped.
                    stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
                                     for s in chunks)
                # Escape set operations (&&, ~~ and ||).
                stuff = re.sub(r'([&~|])', r'\\\1', stuff)
                i = j+1
                if not stuff:
                    # Empty range: never match.
                    add('(?!)')
                elif stuff == '!':
                    # Negated empty range: match any character.
                    add('.')
                else:
                    if stuff[0] == '!':
                        stuff = '^' + stuff[1:]
                    elif stuff[0] in ('^', '['):
                        stuff = '\\' + stuff
                    add(f'[{stuff}]')
        else:
            add(re.escape(c))
    assert i == n
    return res


 def translate(pat, *, recursive=False, include_hidden=False, seps=None):
    """Translate a pathname with shell wildcards to a regular expression.

    If `recursive` is true, the pattern segment '**' will match any number of
    path segments.

    If `include_hidden` is true, wildcards can match path segments beginning
    with a dot ('.').

    If a sequence of separator characters is given to `seps`, they will be
    used to split the pattern into segments and match path separators. If not
    given, os.path.sep and os.path.altsep (where available) are used.
    """
    if not seps:
        if os.path.altsep:
            seps = (os.path.sep, os.path.altsep)
        else:
            seps = os.path.sep
    escaped_seps = ''.join(map(re.escape, seps))
    any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps
    not_sep = f'[^{escaped_seps}]'
    if include_hidden:
        one_last_segment = f'{not_sep}+'
        one_segment = f'{one_last_segment}{any_sep}'
        any_segments = f'(?:.+{any_sep})?'
        any_last_segments = '.*'
    else:
        one_last_segment = f'[^{escaped_seps}.]{not_sep}*'
        one_segment = f'{one_last_segment}{any_sep}'
        any_segments = f'(?:{one_segment})*'
        any_last_segments = f'{any_segments}(?:{one_last_segment})?'

    results = []
    parts = re.split(any_sep, pat)
    last_part_idx = len(parts) - 1
    for idx, part in enumerate(parts):
        if part == '*':
            results.append(one_segment if idx < last_part_idx else one_last_segment)
        elif recursive and part == '**':
            if idx < last_part_idx:
                if parts[idx + 1] != '**':
                    results.append(any_segments)
            else:
                results.append(any_last_segments)
        else:
            if part:
                if not include_hidden and part[0] in '*?':
                    results.append(r'(?!\.)')
                results.extend(_translate(part, f'{not_sep}*', not_sep))
            if idx < last_part_idx:
                results.append(any_sep)
    res = ''.join(results)
    return fr'(?s:{res})\Z'

 @functools.lru_cache(maxsize=512)
 def _compile_pattern(pat, sep, case_sensitive, recursive=True) -> re.Pattern:
    flags = re.NOFLAG if case_sensitive else re.IGNORECASE
    regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep)
    return re.compile(regex, flags=flags).match


 def full_match(path: pathlib.Path, pattern:str, case_sensitive=True) -> bool:
    pattern_path = path.with_segments(pattern)
    f = pattern_path._flavour
    match = _compile_pattern(pattern, f.sep, case_sensitive, True)
    return match(str(path)) is not None


 if __name__ == '__main__':
    from pathlib import Path

    # Single-part patterns
    assert not full_match(Path('a/b.py'), 'b.py')
    assert not full_match(Path('/a/b.py'), 'b.py')
    assert not full_match(Path('a.py'), 'b.py')
    assert not full_match(Path('b/py'), 'b.py')
    assert not full_match(Path('/a.py'), 'b.py')
    assert not full_match(Path('b.py/c'), 'b.py')

    # Wildcard relative pattern.
    assert full_match(Path('b.py'), '*.py')
    assert not full_match(Path('a/b.py'), '*.py')
    assert not full_match(Path('/a/b.py'), '*.py')
    assert not full_match(Path('b.pyc'), '*.py')
    assert not full_match(Path('b./py'), '*.py')
    assert not full_match(Path('b.py/c'), '*.py')

    # Multi-part relative pattern.
    assert full_match(Path('ab/c.py'), 'a*/*.py')
    assert not full_match(Path('/d/ab/c.py'), 'a*/*.py')
    assert not full_match(Path('a.py'), 'a*/*.py')
    assert not full_match(Path('/dab/c.py'), 'a*/*.py')
    assert not full_match(Path('ab/c.py/d'), 'a*/*.py')

    # Absolute pattern.
    assert full_match(Path('/b.py'), '/*.py')
    assert not full_match(Path('b.py'), '/*.py')
    assert not full_match(Path('a/b.py'), '/*.py')
    assert not full_match(Path('/a/b.py'), '/*.py')

    # Multi-part absolute pattern.
    assert full_match(Path('/a/b.py'), '/a/*.py')
    assert not full_match(Path('/ab.py'), '/a/*.py')
    assert not full_match(Path('/a/b/c.py'), '/a/*.py')

    # Multi-part glob-style patterns
    assert full_match(Path('a'), '**')
    assert full_match(Path('c.py'), '**')
    assert full_match(Path('a/b/c.py'), '**')
    assert full_match(Path('/a/b/c.py'), '**')
    assert full_match(Path('/a/b/c.py'), '/**')
    assert full_match(Path('/a/b/c.py'), '/a/**')
    assert full_match(Path('/a/b/c.py'), '**/*.py')
    assert full_match(Path('/a/b/c.py'), '/**/*.py')
    assert full_match(Path('/a/b/c.py'), '/a/**/*.py')
    assert full_match(Path('/a/b/c.py'), '/a/b/**/*.py')
    assert full_match(Path('/a/b/c.py'), '/**/**/**/**/*.py')

    assert not full_match(Path('c.py'), '**/a.py')
    assert not full_match(Path('c.py'), 'c/**')
    assert not full_match(Path('a/b/c.py'), '**/a')
    assert not full_match(Path('a/b/c.py'), '**/a/b')
    assert not full_match(Path('a/b/c.py'), '**/a/b/c')
    assert not full_match(Path('a/b/c.py'), '**/a/b/c.')
    assert not full_match(Path('a/b/c.py'), '**/a/b/c./**')
    assert not full_match(Path('a/b/c.py'), '**/a/b/c./**')
    assert not full_match(Path('a/b/c.py'), '/a/b/c.py/**')
    assert not full_match(Path('a/b/c.py'), '/**/a/b/c.py')

    # Case-sensitive flag
    assert not full_match(Path('A.py'), 'a.PY', case_sensitive=True)
    assert full_match(Path('A.py'), 'a.PY', case_sensitive=False)
    assert not full_match(Path('c:/a/B.Py'), 'C:/A/*.pY', case_sensitive=True)
    assert full_match(Path('/a/b/c.py'), '/A/*/*.Py', case_sensitive=False)

    # Matching against empty path
    #assert not full_match(Path(''), '*')
    assert full_match(Path(''), '**')
    #assert not full_match(Path(''), '**/*')

    # Matching with empty pattern
    #assert full_match(Path(''), '')
    assert full_match(Path('.'), '.')
    assert not full_match(Path('/'), '')
    assert not full_match(Path('/'), '.')
    assert not full_match(Path('foo'), '')
    assert not full_match(Path('foo'), '.')
	import pathlib
	import re
	import os
	import functools

	def _translate(pat, STAR, QUESTION_MARK):
	res = []
	add = res.append
	i, n = 0, len(pat)
	while i < n:
	c = pat[i]
	i = i+1
	if c == '*':
	# compress consecutive `*` into one
	if (not res) or res[-1] is not STAR:
	add(STAR)
	elif c == '?':
	add(QUESTION_MARK)
	elif c == '[':
	j = i
	if j < n and pat[j] == '!':
	j = j+1
	if j < n and pat[j] == ']':
	j = j+1
	while j < n and pat[j] != ']':
	j = j+1
	if j >= n:
	add('\\[')
	else:
	stuff = pat[i:j]
	if '-' not in stuff:
	stuff = stuff.replace('\\', r'\\')
	else:
	chunks = []
	k = i+2 if pat[i] == '!' else i+1
	while True:
	k = pat.find('-', k, j)
	if k < 0:
	break
	chunks.append(pat[i:k])
	i = k+1
	k = k+3
	chunk = pat[i:j]
	if chunk:
	chunks.append(chunk)
	else:
	chunks[-1] += '-'
	# Remove empty ranges -- invalid in RE.
	for k in range(len(chunks)-1, 0, -1):
	if chunks[k-1][-1] > chunks[k][0]:
	chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
	del chunks[k]
	# Escape backslashes and hyphens for set difference (--).
	# Hyphens that create ranges shouldn't be escaped.
	stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
	for s in chunks)
	# Escape set operations (&&, ~~ and \|\|).
	stuff = re.sub(r'([&~\|])', r'\\\1', stuff)
	i = j+1
	if not stuff:
	# Empty range: never match.
	add('(?!)')
	elif stuff == '!':
	# Negated empty range: match any character.
	add('.')
	else:
	if stuff[0] == '!':
	stuff = '^' + stuff[1:]
	elif stuff[0] in ('^', '['):
	stuff = '\\' + stuff
	add(f'[{stuff}]')
	else:
	add(re.escape(c))
	assert i == n
	return res


	def translate(pat, *, recursive=False, include_hidden=False, seps=None):
	"""Translate a pathname with shell wildcards to a regular expression.

	If `recursive` is true, the pattern segment '**' will match any number of
	path segments.

	If `include_hidden` is true, wildcards can match path segments beginning
	with a dot ('.').

	If a sequence of separator characters is given to `seps`, they will be
	used to split the pattern into segments and match path separators. If not
	given, os.path.sep and os.path.altsep (where available) are used.
	"""
	if not seps:
	if os.path.altsep:
	seps = (os.path.sep, os.path.altsep)
	else:
	seps = os.path.sep
	escaped_seps = ''.join(map(re.escape, seps))
	any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps
	not_sep = f'[^{escaped_seps}]'
	if include_hidden:
	one_last_segment = f'{not_sep}+'
	one_segment = f'{one_last_segment}{any_sep}'
	any_segments = f'(?:.+{any_sep})?'
	any_last_segments = '.*'
	else:
	one_last_segment = f'[^{escaped_seps}.]{not_sep}*'
	one_segment = f'{one_last_segment}{any_sep}'
	any_segments = f'(?:{one_segment})*'
	any_last_segments = f'{any_segments}(?:{one_last_segment})?'

	results = []
	parts = re.split(any_sep, pat)
	last_part_idx = len(parts) - 1
	for idx, part in enumerate(parts):
	if part == '*':
	results.append(one_segment if idx < last_part_idx else one_last_segment)
	elif recursive and part == '**':
	if idx < last_part_idx:
	if parts[idx + 1] != '**':
	results.append(any_segments)
	else:
	results.append(any_last_segments)
	else:
	if part:
	if not include_hidden and part[0] in '*?':
	results.append(r'(?!\.)')
	results.extend(_translate(part, f'{not_sep}*', not_sep))
	if idx < last_part_idx:
	results.append(any_sep)
	res = ''.join(results)
	return fr'(?s:{res})\Z'

	@functools.lru_cache(maxsize=512)
	def _compile_pattern(pat, sep, case_sensitive, recursive=True) -> re.Pattern:
	flags = re.NOFLAG if case_sensitive else re.IGNORECASE
	regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep)
	return re.compile(regex, flags=flags).match


	def full_match(path: pathlib.Path, pattern:str, case_sensitive=True) -> bool:
	pattern_path = path.with_segments(pattern)
	f = pattern_path._flavour
	match = _compile_pattern(pattern, f.sep, case_sensitive, True)
	return match(str(path)) is not None


	if __name__ == '__main__':
	from pathlib import Path

	# Single-part patterns
	assert not full_match(Path('a/b.py'), 'b.py')
	assert not full_match(Path('/a/b.py'), 'b.py')
	assert not full_match(Path('a.py'), 'b.py')
	assert not full_match(Path('b/py'), 'b.py')
	assert not full_match(Path('/a.py'), 'b.py')
	assert not full_match(Path('b.py/c'), 'b.py')

	# Wildcard relative pattern.
	assert full_match(Path('b.py'), '*.py')
	assert not full_match(Path('a/b.py'), '*.py')
	assert not full_match(Path('/a/b.py'), '*.py')
	assert not full_match(Path('b.pyc'), '*.py')
	assert not full_match(Path('b./py'), '*.py')
	assert not full_match(Path('b.py/c'), '*.py')

	# Multi-part relative pattern.
	assert full_match(Path('ab/c.py'), 'a/.py')
	assert not full_match(Path('/d/ab/c.py'), 'a/.py')
	assert not full_match(Path('a.py'), 'a/.py')
	assert not full_match(Path('/dab/c.py'), 'a/.py')
	assert not full_match(Path('ab/c.py/d'), 'a/.py')

	# Absolute pattern.
	assert full_match(Path('/b.py'), '/*.py')
	assert not full_match(Path('b.py'), '/*.py')
	assert not full_match(Path('a/b.py'), '/*.py')
	assert not full_match(Path('/a/b.py'), '/*.py')

	# Multi-part absolute pattern.
	assert full_match(Path('/a/b.py'), '/a/*.py')
	assert not full_match(Path('/ab.py'), '/a/*.py')
	assert not full_match(Path('/a/b/c.py'), '/a/*.py')

	# Multi-part glob-style patterns
	assert full_match(Path('a'), '**')
	assert full_match(Path('c.py'), '**')
	assert full_match(Path('a/b/c.py'), '**')
	assert full_match(Path('/a/b/c.py'), '**')
	assert full_match(Path('/a/b/c.py'), '/**')
	assert full_match(Path('/a/b/c.py'), '/a/**')
	assert full_match(Path('/a/b/c.py'), '*/.py')
	assert full_match(Path('/a/b/c.py'), '/*/.py')
	assert full_match(Path('/a/b/c.py'), '/a/*/.py')
	assert full_match(Path('/a/b/c.py'), '/a/b/*/.py')
	assert full_match(Path('/a/b/c.py'), '/////*.py')

	assert not full_match(Path('c.py'), '**/a.py')
	assert not full_match(Path('c.py'), 'c/**')
	assert not full_match(Path('a/b/c.py'), '**/a')
	assert not full_match(Path('a/b/c.py'), '**/a/b')
	assert not full_match(Path('a/b/c.py'), '**/a/b/c')
	assert not full_match(Path('a/b/c.py'), '**/a/b/c.')
	assert not full_match(Path('a/b/c.py'), '/a/b/c./')
	assert not full_match(Path('a/b/c.py'), '/a/b/c./')
	assert not full_match(Path('a/b/c.py'), '/a/b/c.py/**')
	assert not full_match(Path('a/b/c.py'), '/**/a/b/c.py')

	# Case-sensitive flag
	assert not full_match(Path('A.py'), 'a.PY', case_sensitive=True)
	assert full_match(Path('A.py'), 'a.PY', case_sensitive=False)
	assert not full_match(Path('c:/a/B.Py'), 'C:/A/*.pY', case_sensitive=True)
	assert full_match(Path('/a/b/c.py'), '/A//.Py', case_sensitive=False)

	# Matching against empty path
	#assert not full_match(Path(''), '*')
	assert full_match(Path(''), '**')
	#assert not full_match(Path(''), '*/')

	# Matching with empty pattern
	#assert full_match(Path(''), '')
	assert full_match(Path('.'), '.')
	assert not full_match(Path('/'), '')
	assert not full_match(Path('/'), '.')
	assert not full_match(Path('foo'), '')
	assert not full_match(Path('foo'), '.')