Skip to content

Instantly share code, notes, and snippets.

@ids1024
Last active November 26, 2018 05:28
Show Gist options
  • Save ids1024/805491118a5cad3e77bc1b83e9af909b to your computer and use it in GitHub Desktop.
Save ids1024/805491118a5cad3e77bc1b83e9af909b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import os
import sys
import subprocess
import argparse
from html.parser import HTMLParser
from urllib.request import urlopen
from pycparser import c_ast, parse_file
CGIT_URL = "https://git.musl-libc.org/cgit/musl"
FAKE_TYPEDEF_URL = "https://raw.githubusercontent.com/eliben/pycparser/master/utils/fake_libc_include/_fake_typedefs.h"
parser = argparse.ArgumentParser()
parser.add_argument('--ignore-const', action='store_true')
parser.add_argument('--array-as-ptr', action='store_true')
parser.add_argument('--drop-struct', action='store_true')
args = parser.parse_args()
class UrlExtractor(HTMLParser):
a = False
def __init__(self):
self.urls = []
super().__init__()
def handle_starttag(self, tag, _attrs):
if tag == 'a':
self.a = True
def handle_endtag(self, tag):
if tag == 'a':
self.a = False
def handle_data(self, data):
if self.a:
self.urls.append(data)
def cgit_download(cgit_url, path, local_path):
url = cgit_url + '/plain/' + path
response = urlopen(url)
if 'text/html' in response.info().get('Content-Type'):
extractor = UrlExtractor()
extractor.feed(response.read().decode())
stderr.write(f"Creating directory '{local_path}'\n")
if not os.path.exists(local_path):
os.mkdir(local_path)
for i in extractor.urls:
if i != '../':
cgit_download(cgit_url, path + '/' + i, local_path + '/' + i)
else:
stderr.write(f"Downloading file '{local_path}'\n")
with open(local_path, 'wb') as local_file:
local_file.write(response.read())
def parseType(node):
# XXX handle node.quals
if isinstance(node, c_ast.TypeDecl):
if isinstance(node.type, c_ast.IdentifierType):
return ' '.join(node.quals + node.type.names)
elif isinstance(node.type, c_ast.Struct):
if args.drop_struct:
return ' '.join(node.quals + [f'{node.type.name}'])
else:
return ' '.join(node.quals + [f'struct {node.type.name}'])
else:
# XXX
return ''
elif isinstance(node, c_ast.PtrDecl):
ptr = '*'
if node.quals:
ptr += ' ' + ' '.join(node.quals)
if isinstance(node.type, c_ast.FuncDecl):
return '(' + parseType(node.type.type) + ')(' + ptr + ')(' + ', '.join(map(parseType, node.type.args.params)) + ')'
return parseType(node.type) + ' ' + ptr
elif isinstance(node, c_ast.Typename):
return parseType(node.type)
elif isinstance(node, c_ast.ArrayDecl):
if node.dim is None or args.array_as_ptr:
# Array is equivalent to a pointer
return parseType(node.type) + ' *'
else:
if isinstance(node.dim, c_ast.Constant):
return parseType(node.type) + f" [{node.dim.value}]"
else:
sys.exit(f"Class '{type(node.dim)}' not handled.")
elif isinstance(node, c_ast.Decl):
return ' '.join(node.quals + [parseType(node.type)])
elif isinstance(node, c_ast.EllipsisParam):
return '...'
else:
sys.exit(f"Class '{type(node)}' not handled.")
def get_name(node):
if isinstance(node, c_ast.FuncDecl):
return get_name(node.type)
if isinstance(node, c_ast.PtrDecl):
return get_name(node.type)
else:
return node.declname
def parse_c(include_dirs, path):
# -undef?
cpp_args = [
'-nostdinc',
'-U__GNUC__',
'-include', '_fake_typedefs.h',
'-D_Noreturn='
]
for i in include_dirs:
cpp_args.append('-isystem')
cpp_args.append(i)
ast = parse_file(path, use_cpp=True, cpp_args=cpp_args)
fns = {}
for _, i in ast.children():
if not isinstance(i, c_ast.Decl):
continue
if not isinstance(i.type, c_ast.FuncDecl):
continue
params = i.type.args.params
name = get_name(i.type)
fns[name] = (parseType(i.type.type), list(map(parseType, params)))
return fns
if not os.path.exists("musl_include"):
cgit_download(CGIT_URL, "include", "musl_include")
cgit_download(CGIT_URL, "arch/generic", "musl_include")
cgit_download(CGIT_URL, "arch/x86_64", "musl_include")
with open('musl_include/bits/alltypes.h', 'w') as outfile:
with open('musl_include/bits/alltypes.h.in') as infile:
outfile.write(infile.read().replace('TYPEDEF', 'typedef'))
outfile.write('\n')
with open('musl_include/alltypes.h.in') as infile:
outfile.write(infile.read().replace('TYPEDEF', 'typedef')
.replace('STRUCT', 'struct'))
subprocess.call(["make", "target/include"], stdout=sys.stderr)
if not os.path.exists("_fake_typedefs.h"):
stderr.write(f"Downloading '{FAKE_TYPEDEF_URL}'\n")
with open("_fake_typedefs.h", 'wb') as typedef_file:
typedef_file.write(urlopen(FAKE_TYPEDEF_URL).read())
paths = []
for path, _dirs, files in os.walk('target/include'):
path = (path + '/').replace('target/include/', '')
for i in files:
paths.append(path + i)
relibc_fns = {}
musl_fns = {}
for i in paths:
sys.stderr.write(f"Parsing '{i}'\n")
relibc_fns.update(parse_c(['include', 'target/include'], 'target/include/' + i))
if os.path.exists('musl_include/' + i):
musl_fns.update(parse_c(['musl_include'], 'musl_include/' + i))
print(f"Found {len(relibc_fns)} Relibc functions, {len(musl_fns)} Musl functions.")
def equivalent_type(a, b):
b = b.replace('restrict', '')
a = a.replace('unsigned int', 'unsigned')
b = b.replace('unsigned int', 'unsigned')
if args.ignore_const:
a = a.replace('const', '')
b = b.replace('const', '')
return a.strip() == b.strip()
def equivalent_func(a, b):
if not equivalent_type(a[0], b[0]):
return False
if not len(a[1]) == len(b[1]):
return False
for i in range(len(a[1])):
if not equivalent_type(a[1][i], b[1][i]):
return False
return True
diffs = []
no_musl = []
for fn, relibc_def in relibc_fns.items():
musl_def = musl_fns.get(fn)
if musl_def is None:
no_musl.append(fn)
else:
if not equivalent_func(relibc_def, musl_def):
diffs.append((fn, relibc_def, musl_def))
print(f"{len(no_musl)} functions not in Musl, {len(diffs)} functions differ, {len(relibc_fns)-len(diffs)-len(no_musl)} matches.")
print()
for fn in no_musl:
print(f"No musl definition for {fn}")
print()
fn_len = max(len(i[0]) for i in diffs)
fn_len = max(fn_len, len("Function"))
differing_len = max(map(len, ('Differing', 'Return', 'Arg Count', 'Arg 255')))
fn_print = None
sep = None
def print_fn(fn):
global fn_print, sep
if sep:
print('━'*fn_len,
'━'*differing_len,
'━'*(78 - fn_len - differing_len), sep='╋')
sep = None
fn_print = fn + ' '*(fn_len-len(fn))
def print_diff(differing, relibc, musl):
global fn_print, sep
if sep:
print(sep)
sep = None
print(fn_print or (' '*fn_len), end='┃')
fn_print = None
print(differing + ' '*(differing_len-len(differing)), end='┃')
print(relibc)
print(' '*fn_len + '┃' + ' '*differing_len + '┃' + musl)
sep = ' '*fn_len + '┠' + '─'*differing_len + '╂' + '─'*(78 - fn_len - differing_len)
print("Function" + ' '*(fn_len-8), end='┃')
print("Differing" + ' '*(differing_len-9), end='┃')
print("Type (Relibc vs Musl)")
print('━'*fn_len,
'━'*differing_len,
'━'*(78 - fn_len - differing_len), sep='╋')
for fn, relibc_def, musl_def in diffs:
print_fn(fn)
if not equivalent_type(relibc_def[0], musl_def[0]):
print_diff('Return', relibc_def[0], musl_def[0])
if len(musl_def[1]) != len(relibc_def[1]):
print_diff('Arg count', len(relibc_def[1]), len(musl_def[1]))
for x in range(min(len(relibc_def[1]), len(musl_def[1]))):
if not equivalent_type(relibc_def[1][x], musl_def[1][x]):
print_diff(f'Arg {x}', relibc_def[1][x], musl_def[1][x])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment