Last active
September 28, 2018 03:19
-
-
Save Martin91/725b6231cdde0e7c5d8f7726e1132996 to your computer and use it in GitHub Desktop.
Python itertools.groupby trap
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data = [{'id': 1, 'value': 1}, {'id': 2, 'value': 2}, {'id': 2, 'value': 3}, {'id': 1, 'value': 4}] | |
for id, ele in groupby(data, key=lambda e: e['id']): | |
print "id: %d" % id | |
print "values: %s" % list(ele) | |
# => id: 1 | |
# => values: [{'id': 1, 'value': 1}] | |
# => id: 2 | |
# => values: [{'id': 2, 'value': 2}, {'id': 2, 'value': 3}] | |
# => id: 1 | |
# => values: [{'id': 1, 'value': 4}] | |
{key: list(value) for key, value in groupby(data, key=lambda x: x['id'])} # Dangerous! | |
# => {1: [{'id': 1, 'value': 4}], 2: [{'id': 2, 'value': 2}, {'id': 2, 'value': 3}]} # missing the first element! | |
{key: list(value) for key, value in groupby(sorted(data, key=lambda x: x['id']), key=lambda x: x['id'])} | |
# => {1: [{'id': 1, 'value': 1}, {'id': 1, 'value': 4}], | |
# 2: [{'id': 2, 'value': 2}, {'id': 2, 'value': 3}]} | |
# According to the official document, https://docs.python.org/2/library/itertools.html#itertools.groupby | |
# | |
# It generates a break or new group every time the value of the key function changes (which is why it is | |
# usually necessary to have sorted the data using the same key function) | |
# | |
# (╯‵□′)╯︵┻━┻ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sorted_groupby(iterable, key=None): | |
""" | |
# https://docs.python.org/2/library/itertools.html#itertools.groupby | |
The built-in `itertools.groupby` has a trap that it generates a break or new | |
group every time the value of the key function changes, that is, it requires | |
developers to sort the data using the same key function firstly. This function | |
acts as a wrapper to sort automatically so that from now on you can always | |
group data safely | |
""" | |
iterable = sorted(iterable, key=key) | |
return itertools.groupby(iterable=iterable, key=key) | |
def groupby_to_dict(iterable, key=None): | |
""" | |
Sort and group `iterable` with provided key function, and then return a dict | |
represents the grouped data | |
""" | |
return {k: list(v) for k, v in sorted_groupby(iterable=iterable, key=key)} | |
for k, v in sorted_groupby(data, key=lambda x: x['id']): | |
print "key: %d" % k | |
print "value: %s" % list(v) | |
# => key: 1 | |
# => value: [{'id': 1, 'value': 1}, {'id': 1, 'value': 4}] | |
# => key: 2 | |
# => value: [{'id': 2, 'value': 2}, {'id': 2, 'value': 3}] | |
groupby_to_dict(data, key=lambda x: x['id']) | |
# => {1: [{'id': 1, 'value': 1}, {'id': 1, 'value': 4}], | |
# => 2: [{'id': 2, 'value': 2}, {'id': 2, 'value': 3}]} | |
data = [1, 2, 3, 2, 3, 4, 1, 1, 2] | |
groupby_to_dict(data) | |
# => {1: [1, 1, 1], 2: [2, 2, 2], 3: [3, 3], 4: [4]} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment