Last active
November 27, 2019 18:57
-
-
Save ezfe/05ff86cb42ecdffcb9cc22f47664d4f7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
import Ink | |
func tidy(_ inputStr: String) -> String { | |
let task = Process() | |
task.launchPath = "/usr/local/Cellar/tidy-html5/5.6.0/bin/tidy" | |
task.arguments = ["--show-body-only", "yes", "--show-info", "no", "--show-warnings", "no", "--show-errors", "0"] | |
let input = Pipe() | |
let output = Pipe() | |
let err = Pipe() | |
task.standardInput = input | |
task.standardOutput = output | |
task.standardError = err | |
task.launch() | |
let inputData = inputStr.data(using: .utf8)! | |
input.fileHandleForWriting.write(inputData) | |
try! input.fileHandleForWriting.close() | |
let outputData = output.fileHandleForReading.readDataToEndOfFile() | |
let outputStr = String(data: outputData, encoding: .utf8) | |
return outputStr! | |
} | |
struct TestCase: Decodable { | |
let markdown: String | |
let html: String | |
} | |
let testURL = URL(string: "https://spec.commonmark.org/0.29/spec.json")! | |
let data = try! Data(contentsOf: testURL) | |
let jsonDecoder = JSONDecoder() | |
let testCases = try! jsonDecoder.decode(Array<TestCase>.self, from: data) | |
let mdParser = MarkdownParser() | |
var pass = 0 | |
for testCase in testCases { | |
let gotHTML = mdParser.html(from: testCase.markdown) | |
if (gotHTML == testCase.html) { | |
pass += 1 | |
} else { | |
let gotTidied = tidy(gotHTML) | |
let testTidied = tidy(testCase.html) | |
if (gotTidied != "" && gotTidied == testTidied) { | |
pass += 1 | |
} else { | |
print("Input:\n\(testCase.markdown)") | |
print("----------------------------------------------------------------") | |
print(testCase.html.replacingOccurrences(of: "\n", with: "\\n")) | |
print("----------------------------------------------------------------") | |
print(gotHTML.replacingOccurrences(of: "\n", with: "\\n")) | |
print("================================================================") | |
} | |
} | |
} | |
print("\(pass)/\(testCases.count) passed") |
@john-mueller ah yes, HTML parsing errors would def. be an issue–definitely can be improved–When tidy() output is blank is treated as failing it decreases to 193.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for working on this! I just compared this to the output of the official tests on the CommonMark repo, and there are a few discrepancies. First, the repo has been updated with an additional test, meaning the test numbers are off by one for most of the file. After downloading the spec.txt that matches the json you're using, from the website, I get 189 out of 649 passing. It seems the problem is that if tidy hits an error, it outputs a blank string, leading
tidy(gotHTML) == tidy(testCase.html)
to return true, even thought the HTML isn't actually being compared. The python tests are using their own normalizing function. Look at test 609 as an example of the problem.Here are my (bash) commands for running the python tests:
Note that this relies on a not-yet-merged modification to Ink to read stdin