Created
June 11, 2016 18:41
-
-
Save saintbyte/8774d01a28cc3e04bbec9544266ecdbd to your computer and use it in GitHub Desktop.
habraproxy.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
from __future__ import unicode_literals | |
import urllib | |
import SimpleHTTPServer | |
import SocketServer | |
import sys | |
import re | |
import optparse | |
from bs4 import BeautifulSoup | |
DEBUG = True | |
PORT = 1111 | |
class Proxy(SimpleHTTPServer.SimpleHTTPRequestHandler): | |
fix_content_types = ['text/html'] | |
ignore_tags = ['script','style','img'] | |
def do_GET(self): | |
print "self.path: {}".format(self.path) | |
try: | |
response = urllib.urlopen(self.path) #TODO копировать метод | |
except: | |
# Ошибки с сетью бываю , но тут надо просто забыть | |
return False | |
# Копирует ответ удаленного сервера | |
self.send_response(response.getcode()) | |
# Копируем заголовки | |
need_fix = False | |
for header in response.info().headers: | |
(h, v) = header.strip().split(": ",) | |
debug("{}={}".format(h, v)) | |
self.send_header(h, v) | |
if h == 'Content-Type': | |
tp = v | |
if ";" in v: # Есть блин параметр кодироки чтоли | |
(tp,addinfo) = v.strip().split("; ",2) | |
if tp in self.fix_content_types: | |
need_fix = True # Так найдем в списке для фикс ставим флаг фиксить | |
self.end_headers() | |
remote_data = response.read() | |
if need_fix: | |
remote_data = self.fix_content(remote_data) | |
self.wfile.write(remote_data) | |
def fix_content(self,data): | |
soup = BeautifulSoup(data,"html.parser") | |
for tag in soup.find_all(): | |
if not tag.name in self.ignore_tags and not tag.string is None: | |
if tag.string.strip() == "": | |
continue | |
fix_str = re.sub(r"(\b\S{6}\b)", r"\1%s" % "™", tag.string,flags=re.UNICODE & re.MULTILINE) | |
print "tag.string:"+tag.string | |
print "fix_str:"+fix_str | |
if tag.string != fix_str: | |
tag_str = str(tag) | |
tag_str_replace = tag_str.decode("utf-8").replace(tag.string,fix_str).encode("utf-8") | |
print tag_str | |
print tag_str_replace | |
data = data.replace(tag_str,tag_str_replace) | |
return data | |
def debug(s): | |
if DEBUG: | |
print s | |
def shutdown(): | |
print "Quit..." | |
quit() | |
def main(): | |
print 'Starting...' | |
try: | |
httpd = SocketServer.ForkingTCPServer(('', PORT), Proxy) | |
except: | |
print "Cant start: {}".format(sys.exc_info()[1]) | |
shutdown() | |
print "serving at port {}".format(PORT) | |
httpd.serve_forever() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment