Skip to content

Instantly share code, notes, and snippets.

@Ogreman
Last active February 24, 2016 12:09
Show Gist options
  • Save Ogreman/fb7e630fb05718a78e00 to your computer and use it in GitHub Desktop.
Save Ogreman/fb7e630fb05718a78e00 to your computer and use it in GitHub Desktop.
#! /usr/bin/python
import requests
import json
import click
import os
import datetime
requests.packages.urllib3.disable_warnings()
URL = "https://[TEAM].slack.com/services/hooks/slackbot?token=[SECRET]&channel=%23{channel}"
REQUEST_COUNTER = 0
class Config(object):
def __init__(self):
self.verbose = False
self.debug = False
def update_requests(method):
def r(*args, **kw):
global REQUEST_COUNTER
REQUEST_COUNTER += 1
return method(*args, **kw)
return r
pass_config = click.make_pass_decorator(Config, ensure=True)
requests.get = update_requests(requests.get)
@click.group()
@click.option('--verbose', is_flag=True)
@click.option('--debug', is_flag=True)
@pass_config
def cli(config, verbose, debug):
config.verbose = verbose
config.debug = debug
if debug:
click.secho(
'Verbose set to {0}.'
.format(config.verbose),
fg="cyan"
)
click.secho(
'Debug set to {0}.'
.format(config.debug),
fg="cyan"
)
@cli.command()
@click.argument('channel')
@click.argument('message', nargs=-1)
@pass_config
def say(config, channel, message):
if not message:
raise click.ClickException("Missing message.")
if config.verbose:
click.echo(
"Sending message to {channel}..."
.format(channel=channel)
)
try:
response = requests.post(
URL.format(channel=channel),
data=' '.join(message)
)
except requests.exceptions.ConnectionError:
click.secho("Failed to post to channel!", fg="red")
else:
if response.ok:
click.secho("Sent!", fg="green")
else:
click.secho(
"[{}] Something went wrong..."
.format(response.status_code),
fg="red"
)
if config.debug:
click.secho(response.content, fg="red")
if config.verbose:
click.echo("Done.")
@cli.command()
@click.argument('scraper')
@click.argument('directory')
@click.option('--channel', default=None)
@click.option('--dump-to-file', is_flag=True)
@pass_config
def links(config, scraper, directory, channel, dump_to_file=False):
scraper_function = {
'urltext': scrape_links_from_text,
'urlatts': scrape_links_from_attachments,
'bcids': scrape_bandcamp_album_ids,
}.get(scraper, scrape_links_from_attachments)
if config.verbose:
click.echo("Using %s to scrape..." % scraper_function.func_name)
if channel is None:
scrape_all = click.confirm("Scrape all?", abort=True)
else:
scrape_all = False
if config.verbose:
click.echo(
"Scraping all links from {channel}..."
.format(channel=channel if channel else "all channels")
)
try:
prev_dir = os.path.abspath(os.path.curdir)
os.chdir(directory)
if scrape_all:
channels = [
dir_name
for dir_name in os.listdir(directory)
if os.path.isdir(os.path.join(directory, dir_name))
]
else:
channels = [channel]
links = [
link
for channel_name in channels
for _, __, files in os.walk(channel_name)
for file_name in files
for link in scraper_function(os.path.join(channel_name, file_name))
]
if config.verbose:
click.echo("Scraped %d links" % len(links))
except (IOError, OSError) as e:
click.secho("Failed to open a file!", fg="red")
if config.debug:
click.secho(str(e), fg="red")
else:
if dump_to_file:
doom_path = os.path.join(
prev_dir,
datetime.datetime.today().isoformat() + '.doom'
)
try:
with open(doom_path, 'w') as fh:
fh.write(json.dumps(links, indent=4))
except (IOError, OSError) as e:
click.secho("Failed to write to file!", fg="red")
if config.debug:
click.secho(str(e), fg="red")
else:
if config.verbose:
click.echo("Wrote to %s" % doom_path)
else:
for link in links:
click.echo(link)
if config.verbose:
click.echo("Performed %d requests." % REQUEST_COUNTER)
click.echo("Done.")
os.chdir(prev_dir)
class NotFoundError(Exception):
pass
def scrape_links_from_attachments(file_name):
with open(file_name) as file_handle:
json_file = json.load(file_handle)
for message in json_file:
if message.get('type') == 'message':
for attachment in message.get('attachments', []):
try:
yield attachment['from_url']
except KeyError:
continue
def scrape_links_from_text(file_name):
with open(file_name) as file_handle:
json_file = json.load(file_handle)
for message in json_file:
if message.get('type') == 'message':
text = message.get('text', '')
try:
if 'http' in text:
yield text
except TypeError:
pass
def scrape_bandcamp_album_ids_from_attachments(message):
for attachment in message['attachments']:
try:
if 'bandcamp.com' in attachment['from_url']:
html = attachment['audio_html']
html.replace('\\', '')
_, seg = html.split('album=')
yield seg[:seg.find('/')]
except (ValueError, KeyError):
continue
def scrape_bandcamp_album_ids_from_urls(message):
comment = '<!-- album id '
comment_len = len(comment)
text = message['text']
if 'http' in text and 'bandcamp.com' in text and 'album' in text:
url = text[text.find('<http') + 1:text.find('>')]
url = url.replace('\\', '').split('|')[0]
response = requests.get(url)
if response.ok:
content = response.text
if comment in content:
pos = content.find(comment)
album_id = content[pos + comment_len:pos + comment_len + 20]
return album_id.split('-->')[0].strip()
raise NotFoundError
def scrape_bandcamp_album_ids(file_name, do_requests=True):
with open(file_name) as file_handle:
json_file = json.load(file_handle)
for message in json_file:
if message.get('type') == 'message':
if 'attachments' in message:
for album_id in scrape_bandcamp_album_ids_from_attachments(message):
yield album_id
elif do_requests:
try:
yield scrape_bandcamp_album_ids_from_urls(message)
except (ValueError, KeyError, NotFoundError):
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment