Created
October 9, 2017 20:39
-
-
Save sainipray/594ebbe94f2518caf2afdb57bfb8691e to your computer and use it in GitHub Desktop.
Get Stackoverflow.com scratch data using Python BeautifulSoup4 library
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from __future__ import unicode_literals | |
import bs4 | |
import requests | |
def get_data(url): | |
res = requests.get(url) | |
res.raise_for_status() | |
return bs4.BeautifulSoup(res.text, 'html.parser') | |
def get_latest_questions(): | |
soup = get_data('http://stackoverflow.com') | |
latest_questions = soup.select('.question-summary > div.summary > h3 > a') | |
users = soup.select('.question-summary > div.summary > div.started > a:nth-of-type(2)') | |
status = soup.select('.question-summary > div.summary > div.started > a.started-link') | |
for index, question in enumerate(latest_questions): | |
print "Question {0}: {1}. {2} by {3}".format(index + 1, question.text.strip(), status[index].text.strip(), | |
users[index].text.strip()) | |
def get_popular_tags(): | |
soup = get_data('https://stackoverflow.com/tags') | |
tags = soup.select('#tags-browser .post-tag') | |
counts = soup.select('#tags-browser span.item-multiplier-count') | |
for index, tag in enumerate(tags): | |
print "Tag {0}: {1} X {2}".format(index + 1, tag.text.strip(), counts[index].text.strip()) | |
def get_top_users(): | |
soup = get_data('https://stackoverflow.com/users?tab=Reputation&filter=all') | |
users = soup.select('#user-browser .user-details > a') | |
reputation = soup.select('#user-browser .user-details .reputation-score') | |
for index, user in enumerate(users): | |
print "User {0}: {1} - {2}".format(index + 1, user.text.strip(), reputation[index].text.strip()) | |
def get_top_users_of_tag(): | |
tag = raw_input('Enter tag name ') | |
print "Top Answerers" | |
soup = get_data('https://stackoverflow.com/tags/' + tag + '/topusers') | |
users = soup.select('#questions div.fl')[1].select('.user-details a') | |
point = soup.select('#questions div.fl')[1].select('tr') | |
for index, user in enumerate(users): | |
print "User {0}: {1} - {2}".format(index + 1, user.text.strip(), | |
point[index].select('td:nth-of-type(1)')[0].text.strip()) | |
print "\n\nTop Askers" | |
users = soup.select('#questions div.fl')[3].select('.user-details a') | |
point = soup.select('#questions div.fl')[3].select('tr') | |
for index, user in enumerate(users): | |
print "User {0}: {1} - {2}".format(index + 1, user.text.strip(), | |
point[index].select('td:nth-of-type(1)')[0].text.strip()) | |
if __name__ == '__main__': | |
print "Get latest questions\n\n" | |
get_latest_questions() | |
print "\n\nGet popular tags\n\n" | |
get_popular_tags() | |
print "\n\nGet top users\n\n" | |
get_top_users() | |
print "\n\nGet users of tag\n\n" | |
get_top_users_of_tag() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment