Last active
January 18, 2020 18:50
-
-
Save MarkBaggett/c43cf2440ed63990f67613f2134f120b to your computer and use it in GitHub Desktop.
Determine a persons interests based on who they follow
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Twit Interest will infer a persons interests based upon the most common words in the descriptions of those they follow""" | |
from twython import Twython | |
from collections import Counter | |
import sys | |
#Twython isn't a standard module. "python -m pip install twython" to install it. If no pip run this https://bootstrap.pypa.io/get-pip.py | |
twit = Twython(<your twitter APP KEY HERE> , < Your twitter Secret key here >) | |
#Need a key? Go https://apps.twitter.com/app/new Create app. Put anything you want for the values in the app. | |
#After app is created get the API Key and SECRET from the "KEYS AND ACCESS TOKENS" tab. | |
#WARNING: Twitter limits you to around 1500 queries a day. This tool does one request per follower and looks at all their profiles. | |
if len(sys.argv) != 2: | |
print('Usage - Pass one argument. The twitter profile name. Do not include the @.\n Example: "python twit_interests.py cslewisdaily"') | |
sys.exit(1) | |
c = Counter() | |
ignore_words = ['AND', "WE", "THAT","THAN","THEM", 'OF', "GET", "SO", "SOME",'THE', 'TO', 'FOR', 'MY', 'IN', 'IS', 'ARE', 'I', 'AT','ON', 'WITH', 'BY', 'NOT', 'THAT','TWEETS', 'WE', 'ALL', 'FROM', 'OWN', 'ABOUT', 'DO', 'YOU', 'OUR', 'THIS', 'THINGS', 'TEAM', 'YOUR', 'VIEWS', 'AN', 'TWITTER', 'THOUGHTS', 'MOST', 'NOW', 'OPEN', 'NEW', 'THERE', 'AS', 'WHO', 'ONE', 'OR', 'ALWAYS', 'IF', 'ME', 'THOSE', 'BE', 'AM', 'AROUND', 'LIKE', 'NO', 'SEE', 'GROUP', 'EXPRESSED', 'JUST', 'EVERY', 'BUT', 'MORE', "WHY", "BECAUSE", "SOLELY", "WITHIN", "SINCE", "ITS", "LOT", "OTHER","BOTH","UP","HERE","BETTER"] | |
ignore_chars = """.,'!-/:?"'#@""" | |
following = twit.get_friends_ids(screen_name=sys.argv[1]).get("ids") | |
num = len(following) | |
print("Analyzing {} accounts...\n".format(num)) | |
for count, eachuser in enumerate(following): | |
if count %10 == 0: | |
print("\r|{0:-<50}| {1:3.2f}%".format("X"*( 50 * count//num), 100*count/num ),end="") | |
description = twit.show_user(id=eachuser).get("description") | |
description = description.translate(str.maketrans("", "", ignore_chars)) | |
description = [x for x in description.upper().split() if x not in ignore_words and len(x)>1] | |
c.update(description) | |
print(c.most_common(1000)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment