Created
September 6, 2019 18:19
-
-
Save toby-p/6c0b4fb4897d7bd584de022763883b92 to your computer and use it in GitHub Desktop.
Search for strings in iterables and Pandas DataFrames
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def str_search(*substrings: str, iterable, exact_match: bool = False) -> list: | |
"""Case-insensitive search of an iterable for substrings. | |
Args: | |
substrings (str): strings to search for in the iterable. | |
iterable (list, tuple, etc): iterable containing string objects to be | |
searched. | |
exact_match (bool): if True only return a single value that exactly | |
matches the substring supplied (therefore only works if 1 substring | |
arg is supplied). Otherwise returns list of all partial matches. | |
""" | |
lower_to_real = {str.lower(i): i for i in iterable if isinstance(i, str)} | |
strings = [str.lower(s) for s in substrings if isinstance(s, str)] | |
if exact_match: | |
try: | |
match = [v for k, v in lower_to_real.items() if all([s == k for s in strings])][0] | |
except IndexError: | |
return list() | |
else: | |
match = [v for k, v in lower_to_real.items() if all([s in k for s in strings])] | |
return match | |
def str_search_df(*keywords, df): | |
"""Search all columns of a Pandas DataFrame for strings matching keywords. | |
""" | |
mask = (~df.index.isin(list(df.index))) | |
for col in df.columns: | |
matches = str_search(*keywords, iterable=df[col].astype(str)) | |
if len(matches): | |
mask = mask | (df[col].isin(matches)) | |
return df[mask] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment