Last active
June 4, 2021 18:04
-
-
Save aritraroy24/6d8263abe9297cc87d7bacafb3465a13 to your computer and use it in GitHub Desktop.
The main python script for running the bot
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# importing modules | |
import feedparser | |
from bs4 import BeautifulSoup | |
import tweepy | |
import os | |
import logging | |
import datetime | |
# url for collecting news | |
url = "http://news.google.com/news?q=computational+chemistry&hl=en-US&sort=date&gl=US&num=100&output=rss" | |
# logging config | |
logging.basicConfig(level=logging.INFO, filename='data.txt',) | |
logger = logging.getLogger() | |
# Keys | |
CONSUMER_KEY = os.environ['Consumer_Key'] | |
CONSUMER_SECRET_KEY = os.environ['Consumer_Secret_Key'] | |
ACCESS_TOKEN = os.environ['Access_Token'] | |
ACCESS_TOKEN_SECRET = os.environ['Access_Token_Secret'] | |
# Authentication | |
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET_KEY) | |
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) | |
api = tweepy.API(auth) | |
class ParseFeed(): | |
def __init__(self, url): | |
self.feed_url = url | |
def clean(self, html): | |
''' | |
Getting the text from html and doing some cleaning | |
''' | |
soup = BeautifulSoup(html) | |
text = soup.get_text() | |
text = text.replace('\xa0', ' ') | |
return text | |
def follow_back(self, api): | |
''' | |
Following back to those users who are retweeting | |
''' | |
# getting the retweeters of latest tweet | |
latest_tweet_id = (api.user_timeline(count=1)[0]).id | |
retweets_list = api.retweets(latest_tweet_id) | |
user_name_list = [ | |
retweet.user.id for retweet in retweets_list] | |
# getting the friendlist | |
friend_list = [friend for friend in api.friends( | |
user_id="CompChemNewsBot")] | |
friend_final_list = [friend.id for friend in friend_list] | |
# removing the already followed users | |
final_list = [ | |
elem for elem in user_name_list if elem not in friend_final_list] | |
# in case no new user to follow | |
if final_list == []: | |
logger.info( | |
f"No new user found at: {str(datetime.datetime.now())}\n\t\t\t\t\t\t******\t\t\t\t\t\t") | |
# new user found and following the user | |
else: | |
for user in final_list: | |
try: | |
api.create_friendship(user_id=user) | |
logger.info( | |
f"{user} UserID followed back at : {str(datetime.datetime.now())}\n\t\t\t\t\t\t******\t\t\t\t\t\t") | |
except Exception as e: | |
logger.info( | |
f"Can't be followed back at : {str(datetime.datetime.now())} due to {e} error\n\t\t\t\t\t\t******\t\t\t\t\t\t") | |
def tweet(self, text_list, url_list): | |
''' | |
Posting the news in the twitter and logging the data (First news of the list will be posted: One can modify by using random function) | |
''' | |
# removing news already tweeted | |
file_name = "tweets.txt" | |
with open(file_name, "r") as file: | |
tweet_list = file.readlines() | |
tweet_list = [x.strip() for x in tweet_list] | |
for tweet in tweet_list: | |
for text, url in zip(text_list, url_list): | |
if text == tweet: | |
text_list.remove(text) | |
url_list.remove(url) | |
# tweeting the fast news from the modified newslist | |
for text in text_list: | |
i = 0 | |
try: | |
with open(file_name, "a") as file: | |
file.write(f"{text}\n\n") | |
api.update_status( | |
f"@aritraroy24 #compchem #news #update #science #chemistry #quantum\nπππΏπΌπ'π πππΏπΌππ: {text_list[i]}\n{url_list[i]}") | |
logger.info( | |
f"Tweet done at : {str(datetime.datetime.now())} === {text_list[i]} === {url_list[i]}\n\n\n") | |
i += 1 | |
break | |
except Exception as e: | |
logger.info( | |
f"Tweet can't be done at : {str(datetime.datetime.now())} due to {e} error\n\n\n") | |
def parse(self): | |
''' | |
Parsing URL, and collecting descriptions and URLs of the news of which the character length of the tweet is in between 100 and 200 | |
''' | |
text_list = [] | |
url_list = [] | |
feeds = feedparser.parse(self.feed_url).entries | |
for f in feeds: | |
text = self.clean(f.get("description")) | |
url = f.get("link") | |
count = sum((text[i] != ' ') for i in range(len(text))) | |
if count < 150: | |
text_list.append(text) | |
url_list.append(url) | |
self.follow_back(api) | |
self.tweet(text_list, url_list) | |
feed = ParseFeed(url) | |
feed.parse() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment