Last active
March 1, 2018 18:29
-
-
Save alexritter96/042292001c9d2d93d1066837c7041dba to your computer and use it in GitHub Desktop.
dumb reddit scraping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import praw | |
reddit = praw.Reddit( | |
client_id='', | |
client_secret='', | |
password='', | |
user_agent='', | |
username='') | |
def get_comment_body(): | |
threads = get_subreddit_id(sub) | |
for thread in threads: | |
submission = reddit.submission(id=thread) | |
submission.comments.replace_more(limit=0) | |
for comment in submission.comments.list(): | |
with open('subreddits.txt', 'a') as outfile: | |
outfile.write(comment.body) | |
def get_subreddit_id(sub): | |
submissions = [] | |
for submit in reddit.subreddit(sub).hot(limit=25): | |
submissions.append(submit.id) | |
return submissions | |
def get_user_comments(redditor, tfile=False): | |
for comment in reddit.redditor(redditor).comments.new(limit=None): | |
if tfile: | |
with open('usercomments.txt', 'a') as outfile: | |
outfile.write(comment.body) | |
else: | |
data = {comment.id: comment.body} | |
with open('comment.json', 'a') as outfile: | |
json.dump(data, outfile) | |
def subreddit_to_json(): | |
threads = get_subreddit_id(sub) | |
for thread in threads: | |
submission = reddit.submission(id=thread) | |
submission.comments.replace_more(limit=0) | |
comments = submission.comments.list() | |
data = [{comment.id: comment.body} for comment in comments] | |
with open('data.json', 'a') as outfile: | |
json.dump(data, outfile) | |
sub = 'learnpython' | |
name = 'danceprometheus' | |
get_comment_body() | |
get_user_comments(name) | |
subreddit_to_json() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment