Last active
November 26, 2018 05:28
-
-
Save ids1024/805491118a5cad3e77bc1b83e9af909b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import sys | |
import subprocess | |
import argparse | |
from html.parser import HTMLParser | |
from urllib.request import urlopen | |
from pycparser import c_ast, parse_file | |
CGIT_URL = "https://git.musl-libc.org/cgit/musl" | |
FAKE_TYPEDEF_URL = "https://raw.githubusercontent.com/eliben/pycparser/master/utils/fake_libc_include/_fake_typedefs.h" | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--ignore-const', action='store_true') | |
parser.add_argument('--array-as-ptr', action='store_true') | |
parser.add_argument('--drop-struct', action='store_true') | |
args = parser.parse_args() | |
class UrlExtractor(HTMLParser): | |
a = False | |
def __init__(self): | |
self.urls = [] | |
super().__init__() | |
def handle_starttag(self, tag, _attrs): | |
if tag == 'a': | |
self.a = True | |
def handle_endtag(self, tag): | |
if tag == 'a': | |
self.a = False | |
def handle_data(self, data): | |
if self.a: | |
self.urls.append(data) | |
def cgit_download(cgit_url, path, local_path): | |
url = cgit_url + '/plain/' + path | |
response = urlopen(url) | |
if 'text/html' in response.info().get('Content-Type'): | |
extractor = UrlExtractor() | |
extractor.feed(response.read().decode()) | |
stderr.write(f"Creating directory '{local_path}'\n") | |
if not os.path.exists(local_path): | |
os.mkdir(local_path) | |
for i in extractor.urls: | |
if i != '../': | |
cgit_download(cgit_url, path + '/' + i, local_path + '/' + i) | |
else: | |
stderr.write(f"Downloading file '{local_path}'\n") | |
with open(local_path, 'wb') as local_file: | |
local_file.write(response.read()) | |
def parseType(node): | |
# XXX handle node.quals | |
if isinstance(node, c_ast.TypeDecl): | |
if isinstance(node.type, c_ast.IdentifierType): | |
return ' '.join(node.quals + node.type.names) | |
elif isinstance(node.type, c_ast.Struct): | |
if args.drop_struct: | |
return ' '.join(node.quals + [f'{node.type.name}']) | |
else: | |
return ' '.join(node.quals + [f'struct {node.type.name}']) | |
else: | |
# XXX | |
return '' | |
elif isinstance(node, c_ast.PtrDecl): | |
ptr = '*' | |
if node.quals: | |
ptr += ' ' + ' '.join(node.quals) | |
if isinstance(node.type, c_ast.FuncDecl): | |
return '(' + parseType(node.type.type) + ')(' + ptr + ')(' + ', '.join(map(parseType, node.type.args.params)) + ')' | |
return parseType(node.type) + ' ' + ptr | |
elif isinstance(node, c_ast.Typename): | |
return parseType(node.type) | |
elif isinstance(node, c_ast.ArrayDecl): | |
if node.dim is None or args.array_as_ptr: | |
# Array is equivalent to a pointer | |
return parseType(node.type) + ' *' | |
else: | |
if isinstance(node.dim, c_ast.Constant): | |
return parseType(node.type) + f" [{node.dim.value}]" | |
else: | |
sys.exit(f"Class '{type(node.dim)}' not handled.") | |
elif isinstance(node, c_ast.Decl): | |
return ' '.join(node.quals + [parseType(node.type)]) | |
elif isinstance(node, c_ast.EllipsisParam): | |
return '...' | |
else: | |
sys.exit(f"Class '{type(node)}' not handled.") | |
def get_name(node): | |
if isinstance(node, c_ast.FuncDecl): | |
return get_name(node.type) | |
if isinstance(node, c_ast.PtrDecl): | |
return get_name(node.type) | |
else: | |
return node.declname | |
def parse_c(include_dirs, path): | |
# -undef? | |
cpp_args = [ | |
'-nostdinc', | |
'-U__GNUC__', | |
'-include', '_fake_typedefs.h', | |
'-D_Noreturn=' | |
] | |
for i in include_dirs: | |
cpp_args.append('-isystem') | |
cpp_args.append(i) | |
ast = parse_file(path, use_cpp=True, cpp_args=cpp_args) | |
fns = {} | |
for _, i in ast.children(): | |
if not isinstance(i, c_ast.Decl): | |
continue | |
if not isinstance(i.type, c_ast.FuncDecl): | |
continue | |
params = i.type.args.params | |
name = get_name(i.type) | |
fns[name] = (parseType(i.type.type), list(map(parseType, params))) | |
return fns | |
if not os.path.exists("musl_include"): | |
cgit_download(CGIT_URL, "include", "musl_include") | |
cgit_download(CGIT_URL, "arch/generic", "musl_include") | |
cgit_download(CGIT_URL, "arch/x86_64", "musl_include") | |
with open('musl_include/bits/alltypes.h', 'w') as outfile: | |
with open('musl_include/bits/alltypes.h.in') as infile: | |
outfile.write(infile.read().replace('TYPEDEF', 'typedef')) | |
outfile.write('\n') | |
with open('musl_include/alltypes.h.in') as infile: | |
outfile.write(infile.read().replace('TYPEDEF', 'typedef') | |
.replace('STRUCT', 'struct')) | |
subprocess.call(["make", "target/include"], stdout=sys.stderr) | |
if not os.path.exists("_fake_typedefs.h"): | |
stderr.write(f"Downloading '{FAKE_TYPEDEF_URL}'\n") | |
with open("_fake_typedefs.h", 'wb') as typedef_file: | |
typedef_file.write(urlopen(FAKE_TYPEDEF_URL).read()) | |
paths = [] | |
for path, _dirs, files in os.walk('target/include'): | |
path = (path + '/').replace('target/include/', '') | |
for i in files: | |
paths.append(path + i) | |
relibc_fns = {} | |
musl_fns = {} | |
for i in paths: | |
sys.stderr.write(f"Parsing '{i}'\n") | |
relibc_fns.update(parse_c(['include', 'target/include'], 'target/include/' + i)) | |
if os.path.exists('musl_include/' + i): | |
musl_fns.update(parse_c(['musl_include'], 'musl_include/' + i)) | |
print(f"Found {len(relibc_fns)} Relibc functions, {len(musl_fns)} Musl functions.") | |
def equivalent_type(a, b): | |
b = b.replace('restrict', '') | |
a = a.replace('unsigned int', 'unsigned') | |
b = b.replace('unsigned int', 'unsigned') | |
if args.ignore_const: | |
a = a.replace('const', '') | |
b = b.replace('const', '') | |
return a.strip() == b.strip() | |
def equivalent_func(a, b): | |
if not equivalent_type(a[0], b[0]): | |
return False | |
if not len(a[1]) == len(b[1]): | |
return False | |
for i in range(len(a[1])): | |
if not equivalent_type(a[1][i], b[1][i]): | |
return False | |
return True | |
diffs = [] | |
no_musl = [] | |
for fn, relibc_def in relibc_fns.items(): | |
musl_def = musl_fns.get(fn) | |
if musl_def is None: | |
no_musl.append(fn) | |
else: | |
if not equivalent_func(relibc_def, musl_def): | |
diffs.append((fn, relibc_def, musl_def)) | |
print(f"{len(no_musl)} functions not in Musl, {len(diffs)} functions differ, {len(relibc_fns)-len(diffs)-len(no_musl)} matches.") | |
print() | |
for fn in no_musl: | |
print(f"No musl definition for {fn}") | |
print() | |
fn_len = max(len(i[0]) for i in diffs) | |
fn_len = max(fn_len, len("Function")) | |
differing_len = max(map(len, ('Differing', 'Return', 'Arg Count', 'Arg 255'))) | |
fn_print = None | |
sep = None | |
def print_fn(fn): | |
global fn_print, sep | |
if sep: | |
print('━'*fn_len, | |
'━'*differing_len, | |
'━'*(78 - fn_len - differing_len), sep='╋') | |
sep = None | |
fn_print = fn + ' '*(fn_len-len(fn)) | |
def print_diff(differing, relibc, musl): | |
global fn_print, sep | |
if sep: | |
print(sep) | |
sep = None | |
print(fn_print or (' '*fn_len), end='┃') | |
fn_print = None | |
print(differing + ' '*(differing_len-len(differing)), end='┃') | |
print(relibc) | |
print(' '*fn_len + '┃' + ' '*differing_len + '┃' + musl) | |
sep = ' '*fn_len + '┠' + '─'*differing_len + '╂' + '─'*(78 - fn_len - differing_len) | |
print("Function" + ' '*(fn_len-8), end='┃') | |
print("Differing" + ' '*(differing_len-9), end='┃') | |
print("Type (Relibc vs Musl)") | |
print('━'*fn_len, | |
'━'*differing_len, | |
'━'*(78 - fn_len - differing_len), sep='╋') | |
for fn, relibc_def, musl_def in diffs: | |
print_fn(fn) | |
if not equivalent_type(relibc_def[0], musl_def[0]): | |
print_diff('Return', relibc_def[0], musl_def[0]) | |
if len(musl_def[1]) != len(relibc_def[1]): | |
print_diff('Arg count', len(relibc_def[1]), len(musl_def[1])) | |
for x in range(min(len(relibc_def[1]), len(musl_def[1]))): | |
if not equivalent_type(relibc_def[1][x], musl_def[1][x]): | |
print_diff(f'Arg {x}', relibc_def[1][x], musl_def[1][x]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment