Created
September 6, 2012 21:41
-
-
Save jaytaylor/3660565 to your computer and use it in GitHub Desktop.
Convert camel-case to snake-case in python.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Convert camel-case to snake-case in python. | |
e.g.: CamelCase -> snake_case | |
Relevant StackOverflow question: http://stackoverflow.com/a/1176023/293064 | |
""" | |
__author__ = 'Jay Taylor [@jtaylor]' | |
import re | |
_underscorer1 = re.compile(r'(.)([A-Z][a-z]+)') | |
_underscorer2 = re.compile('([a-z0-9])([A-Z])') | |
def camelToSnake(s): | |
""" | |
Is it ironic that this function is written in camel case, yet it | |
converts to snake case? hmm.. | |
""" | |
subbed = _underscorer1.sub(r'\1_\2', s) | |
return _underscorer2.sub(r'\1_\2', subbed).lower() | |
if __name__ == '__main__': | |
assert camelToSnake('snakesOnAPlane') == 'snakes_on_a_plane' | |
assert camelToSnake('SnakesOnAPlane') == 'snakes_on_a_plane' | |
assert camelToSnake('snakes_on_a_plane') == 'snakes_on_a_plane' | |
assert camelToSnake('IPhoneHysteria') == 'i_phone_hysteria' | |
assert camelToSnake('iPhoneHysteria') == 'i_phone_hysteria' | |
print 'All tests passed.' |
The lookaround solution is incomplete:
>>> camelToSnake('HTTPResponseCodeXYZ')
'h_t_t_p_response_code_x_y_z'
The lookaround solution is incomplete:
>>>
camelToSnake('HTTPResponseCodeXYZ')
'h_t_t_p_response_code_x_y_z'
I guess its intended since the tests are doing this
assert camelToSnake('IPhoneHysteria') == 'i_phone_hysteria'
instead of this
assert camelToSnake('IPhoneHysteria') == 'iphone_hysteria'
you can use the inflection library (see https://github.com/jpvanhal/inflection/blob/master/inflection.py#L395-L414) or this snippet taken from their code
>>> regex1 = re.compile(r'([A-Z]+)([A-Z][a-z])')
>>> regex2 = re.compile(r'([a-z\d])([A-Z])')
>>> text = 'HTTPResponseCodeXYZ'
>>> regex2.sub(r'\1_\2', regex1.sub(r'\1_\2', text)).lower()
'http_response_code_xyz'
>>>
My variant of @dopstar's solution, adding an additional test to handle all-caps values.
LEADING_UC_CAMEL_SEGMENT = re.compile(r'([A-Z]+)([A-Z][a-z])')
UC_CAMEL_SEGMENT = re.compile(r'([a-z\d])([A-Z])')
def camel_to_snake(value: str) -> str:
"""
Convert a CamelCasedString to a snake_cased_string. Separates capitalized words with underscores,
and lower-cases the result. Runs of multiple uppercase letters are not split, e.g.
"HTTPPort" would become "http_port".
:param value: a string value
:return: the converted value
"""
lower = value.lower()
if value.upper() == lower: # value is all caps
return lower
else:
return UC_CAMEL_SEGMENT.sub(r'\1_\2',
LEADING_UC_CAMEL_SEGMENT.sub(r'\1_\2', value)).lower()
I tried lots of things which didn't support all cases and I made this one
def to_snake(camel_input):
camel_input = camel_input.replace('&', '_and_')
camel_input = re.sub(r'(?<!^)(?=[A-Z])', '_', camel_input).lower()
words = re.findall(r'[A-Z]?[a-z]+|[A-Z]{2,}(?=[A-Z][a-z]|\d|\W|$)|\d+', camel_input)
return '_'.join(words)
result for "'my p&l AndP&L aa_BB_dd (teSt) AndMy P&L andAlsoP&L andAlso p&l A B aA'":
print(to_snake('my p&l AndP&L aa_BB_dd (teSt) AndMy P&L andAlsoP&L andAlso p&l A B aA'))
my_p_and_l_and_p_and_l_aa_b_b_dd_te_st_and_my_p_and_l_and_also_p_and_l_and_also_p_and_l_a_b_a_a
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
un = json.loads(json.dumps(profile), object_hook=camel_to_snake)