Last active
August 8, 2022 04:17
-
-
Save tianchaijz/cb30e71d6f1fb85e954bca8454942c87 to your computer and use it in GitHub Desktop.
A python script to clone a specified github user's repos, gists.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import os | |
import re | |
import time | |
import json | |
import logging | |
import requests | |
import multiprocessing.dummy | |
from subprocess import PIPE, Popen | |
API = 'api.github.com' | |
USER = None | |
TOKEN = os.environ.get('GITHUB_TOKEN') | |
KIND = None | |
DEBUG = None | |
ERR = None | |
LIMIT = 16 # M | |
POOL = multiprocessing.dummy.Pool(5) | |
DEFAULT_FMT = '%(levelname)-8s [%(asctime)s] %(name)s: %(message)s' | |
class Formatter(logging.Formatter): | |
def __init__(self, datefmt=None): | |
logging.Formatter.__init__(self, DEFAULT_FMT, datefmt) | |
self.converter = time.gmtime | |
def formatException(self, exc_info): | |
text = logging.Formatter.formatException(self, exc_info) | |
text = '\n'.join(('! %s' % line) for line in text.splitlines()) | |
return text | |
def get_console_logger(name): | |
ch = logging.StreamHandler() | |
ch.setLevel(logging.DEBUG) | |
ch.setFormatter(Formatter()) | |
logger = logging.getLogger(name) | |
logger.setLevel(logging.DEBUG) | |
logger.addHandler(ch) | |
return logger | |
def shell(command, stdin=None): | |
process = Popen( | |
args=command, | |
stdout=PIPE, | |
stderr=PIPE, | |
stdin=stdin, | |
shell=True, | |
close_fds=True | |
) | |
std = process.communicate() | |
if std[0]: | |
DEBUG(std[0]) | |
if std[1]: | |
ERR(std[1]) | |
process.wait() | |
def parse_options(): | |
global TOKEN, USER, KIND, LIMIT | |
from optparse import OptionParser | |
parser = OptionParser() | |
parser.add_option('-t', dest='token', action='store', type='string', | |
default=TOKEN) | |
parser.add_option('-u', dest='user', action='store', type='string', | |
help='specify the config file') | |
parser.add_option('-k', dest='kind', action='store', type='string', | |
help='specify the kind(gists, repos)') | |
parser.add_option('--limit', dest='limit', action='store', type='int', | |
help='specify the limit size of clone', default=LIMIT) | |
options, _ = parser.parse_args() | |
assert options.token, 'token required' | |
assert options.user, 'user required' | |
assert options.kind, 'kind required' | |
USER, TOKEN, KIND = options.user, options.token, options.kind | |
if options.limit != LIMIT: | |
LIMIT = options.limit | |
def get_headers(): | |
headers = { | |
'Host': API, | |
'Accept': 'application/vnd.github.v3+json', | |
'Authorization': 'token ' + TOKEN, | |
'User-Agent': 'tianchaijz/clone-it' | |
} | |
return headers | |
def do_request(page): | |
url = 'https://%s/users/%s/%s?page=%d' % (API, USER, KIND, page) | |
r = requests.get(url, headers=get_headers()) | |
return r | |
def shell_clone(url): | |
DEBUG('cloning: %s' % url) | |
path = re.sub(r'\.git$', '', url).split('/')[-1] | |
if os.path.isdir(path): | |
ERR('directory exists: %s, pull now' % (url)) | |
shell('cd %s && git pull --all' % path) | |
return | |
shell('git clone %s' % url) | |
class Clone(object): | |
@staticmethod | |
def gists(gist): | |
shell_clone(gist['git_pull_url']) | |
@staticmethod | |
def repos(repo): | |
url = repo['git_url'] | |
lang = os.environ.get('GITHUB_LANG', '').strip() | |
if lang: | |
lang = lang.lower().split(',') | |
repo_lang = str(repo['language']) | |
if repo_lang.lower() not in lang: | |
DEBUG('skip: %s, language: %s' % (url, repo_lang)) | |
return | |
if repo['size'] / 1024 > LIMIT: | |
DEBUG('skip: %s, size: %d' % (url, repo['size'])) | |
return | |
shell_clone(url) | |
def clone(tasks): | |
POOL.map(getattr(Clone, KIND), tasks) | |
def fetch(page=0): | |
page += 1 | |
r = do_request(page) | |
if r is None: | |
return | |
DEBUG('get page: %d, status: %d, content length: %d' % ( | |
page, r.status_code, len(r.content))) | |
tasks = json.loads(r.content) | |
if tasks: | |
clone(tasks) | |
fetch(page) | |
def main(): | |
global DEBUG, ERR | |
logger = get_console_logger('git/clone-it') | |
DEBUG = lambda s: logger.log(logging.DEBUG, s) | |
ERR = lambda s: logger.log(logging.ERROR, s) | |
parse_options() | |
fetch() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
What does
kind
mean? What are the possible values of it? I just want to download all gists.