Created
September 22, 2024 16:58
-
-
Save TheMatt2/20e164c8daac8ecd6460cb7f0ac28e6d to your computer and use it in GitHub Desktop.
Quick Python script to search UNIX man pages for what errno codes can be return by what system functions, and generates a CSV files with the results.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import os | |
import csv | |
import sys | |
import textwrap | |
import subprocess | |
from io import StringIO | |
from colorama.ansitowin32 import AnsiToWin32 as AnsiToText | |
class AnsiToText: | |
def __init__(self, stream): | |
self.stream = stream | |
def write(self, text): | |
# Implement '\x08' to delete prior character. | |
while text: | |
index = text.find('\x08') | |
if index != -1: | |
leading, text = text[:index - 1], text[index + 1:] | |
else: | |
leading, text = text, '' | |
# Replace NSBP No Break Space. | |
#leading = leading.replace('\xa0', ' ') | |
# Replace Ascii-like Unicode | |
leading = leading.replace('\u2013', '-') | |
leading = leading.replace('\u2018', "'") | |
leading = leading.replace('\u2019', "'") | |
leading = leading.replace('\u201c', '"') | |
leading = leading.replace('\u201d', '"') | |
leading = leading.replace('\u2022', "*") | |
leading = leading.replace('\u2192', '->') | |
leading = leading.replace('\u2500', '-') | |
leading = leading.replace('\u2501', '-') | |
leading = leading.replace('\u2502', '|') | |
leading = leading.replace('\u252c', '|') | |
leading = leading.replace('\u252f', '|') | |
leading = leading.replace('\u253c', '|') | |
leading = leading.replace('\u2534', '|') | |
leading = leading.replace('\u2577', '.') | |
leading = leading.replace('\u27e8', '(') | |
leading = leading.replace('\u27e9', ')') | |
for c in leading: | |
if c in "\n\xa0": continue | |
if not c.isprintable() or not c.isascii(): | |
print(repr(c)) | |
assert 0 | |
self.stream.write(leading) | |
def print_list(items, columns=4, width=80, file = None): | |
if not file: | |
file = sys.stdout | |
items = list(sorted(items)) | |
colw = width // columns | |
rows = (len(items) + columns - 1) // columns | |
for row in range(rows): | |
for col in range(columns): | |
i = col * rows + row | |
if i < len(items): | |
file.write(items[i]) | |
if col < columns - 1: | |
file.write(' ' + ' ' * (colw - 1 - len(items[i]))) | |
file.write('\n') | |
def get_error_codes(): | |
man_page = subprocess.check_output(["man", "errno"]).decode() | |
### Strip the ANSI code formatting | |
##man_stripped = StringIO() | |
##converter = AnsiToText(man_stripped, convert = True, strip = True) | |
##converter.write(man_page) | |
## | |
##print(repr(man_stripped.getvalue()[:100])) | |
## | |
##print(man_stripped.getvalue() == man_page) | |
man_stripped = StringIO() | |
converter = AnsiToText(man_stripped) | |
converter.write(man_page) | |
man_stripped = man_stripped.getvalue() | |
# Pull diagnostics section | |
diagnostics = man_stripped[ | |
man_stripped.index("DIAGNOSTICS"): | |
man_stripped.index("DEFINITIONS")] | |
matches = re.finditer( | |
r"^\s+(?P<errnum>\d+) (?P<errname>E[a-zA-Z]+)(?P<errdesc>.+?)\n\n", | |
diagnostics, re.MULTILINE | re.DOTALL) | |
error_codes = [] | |
for match in matches: | |
errnum, errname, errdesc = match.groups() | |
errnum = int(errnum) | |
if errnum == 0: continue | |
errdesc = " ".join(errdesc.split()) | |
error_codes.append((errnum, errname, errdesc)) | |
print(errnum, errname, textwrap.shorten(errdesc, 40, placeholder = "...")) | |
return error_codes | |
def list_commands(): | |
# List all man pages | |
# https://unix.stackexchange.com/questions/44329/how-do-you-output-a-list-of-all-man-pages-in-a-particular-section | |
# Search man page section 2 | |
man_pages = subprocess.check_output(["man", "-aWS", "2", "*"]).decode() | |
unix_commands = [] | |
for page in man_pages.split("\n"): | |
command = os.path.basename(page).split('.', 1)[0] | |
# "intro" is the introduction man page with general information | |
if command and command != "intro": | |
unix_commands.append(command) | |
unix_commands = sorted(set(unix_commands)) | |
print_list(unix_commands) | |
print("Found", len(unix_commands), "commands") | |
return unix_commands | |
# :ook up its man page and its possible error return codes | |
def command_error_codes(command, global_error_codes): | |
man_page = subprocess.check_output(["man", "2", command]).decode() | |
man_stripped = StringIO() | |
converter = AnsiToText(man_stripped) | |
converter.write(man_page) | |
man_stripped = man_stripped.getvalue() | |
# Pull error section | |
errors_index = man_stripped.find("ERRORS") | |
if errors_index != -1: | |
errors = man_stripped[errors_index:] | |
else: | |
errors = "" | |
errors_index = 0 | |
# Find Error numbers | |
matches = re.finditer( | |
r"^\s+\[(?P<errname>E[a-zA-Z]+)\](?P<errdesc>.+?)\n\n", | |
errors, re.MULTILINE | re.DOTALL) | |
error_codes = [] | |
start = len(man_stripped) | |
end = 0 | |
for match in matches: | |
start = min(start, match.start()) | |
end = max(end, match.end()) | |
errname, errdesc = match.groups() | |
errdesc = " ".join(errdesc.split()) | |
error_codes.append((errname, errdesc)) | |
print(command, errname, textwrap.shorten(errdesc, 40, placeholder = "...")) | |
# Check if errno mentioned outside scanned region | |
for _, errname, _ in global_error_codes: | |
errname_index = man_stripped[:errors_index + start].find(errname) | |
if errname_index == -1: | |
errname_index = man_stripped[errors_index + end:].find(errname) | |
if errname_index != -1: | |
desc = man_stripped[errname_index - 20: errname_index + 20] | |
desc = " ".join(desc.split()) | |
print("Error name found outside error section in", command, | |
": ...", desc, "...") | |
return error_codes | |
def main(): | |
error_codes = get_error_codes() | |
error_names = [] | |
error_codes_map = {} | |
for errnum, errname, errdesc in error_codes: | |
error_names.append(errname) | |
error_codes_map[errname] = errnum, errdesc | |
commands = list_commands() | |
command_map = {} | |
for command in commands: | |
command_map[command] = command_error_codes(command, error_codes) | |
print("Found", len(commands), "commands") | |
print("Found", len(error_codes), "error codes") | |
# Write csv file data | |
with open("all_errnos.csv", "w") as f: | |
writer = csv.writer(f) | |
# Write headers | |
writer.writerow(["System Function"] + error_names) | |
writer.writerow(["Error Number"] + [str(error_codes_map[errname][0]) for errname in error_names]) | |
writer.writerow(["Error Description"] + [error_codes_map[errname][1] for errname in error_names]) | |
for command in command_map: | |
command_error_desc_map = {} | |
for errname, errdesc in command_map[command]: | |
command_error_desc_map.setdefault(errname, []) | |
command_error_desc_map[errname].append(errdesc) | |
command_error_rows = [command] | |
for errname in error_names: | |
descriptions = command_error_desc_map.get(errname, []) | |
if len(descriptions) == 1: | |
combined_descr = descriptions[0] | |
else: | |
combined_descr = "" | |
for i, descr in enumerate(descriptions): | |
combined_descr += f"{i + 1}. {descr}\n" | |
combined_descr = combined_descr.strip() | |
command_error_rows.append(combined_descr) | |
writer.writerow(command_error_rows) | |
if __name__ == "__main__": | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment