Last active
May 7, 2025 05:13
-
-
Save sogaiu/31b7ddcc1eab945d27ddb7e08aa28654 to your computer and use it in GitHub Desktop.
experimenting with doc-parse extracted (and slightly modified) from doc-format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# for comparing with version being explored below | |
(defn doc-parse-original | |
`Extracted from doc-format` | |
[str &opt colorize] | |
(def has-color (if (not= nil colorize) | |
colorize | |
(dyn *doc-color*))) | |
# Terminal codes for emission/tokenization | |
(def delimiters | |
(if has-color | |
{:underline ["\e[4m" "\e[24m"] | |
:code ["\e[97m" "\e[39m"] | |
:italics ["\e[4m" "\e[24m"] | |
:bold ["\e[1m" "\e[22m"]} | |
{:underline ["_" "_"] | |
:code ["`" "`"] | |
:italics ["*" "*"] | |
:bold ["**" "**"]})) | |
(def modes @{}) | |
(defn toggle-mode [mode] | |
(def active (get modes mode)) | |
(def delims (get delimiters mode)) | |
(put modes mode (not active)) | |
(delims (if active 1 0))) | |
# Parse state | |
(var cursor 0) # indexes into string for parsing | |
(var stack @[]) # return value for this block. | |
# Traversal helpers | |
(defn c [] (get str cursor)) | |
(defn cn [n] (get str (+ n cursor))) | |
(defn c++ [] (let [ret (get str cursor)] (++ cursor) ret)) | |
(defn c+=n [n] (let [ret (get str cursor)] (+= cursor n) ret)) | |
# skip* functions return number of characters matched and advance the cursor. | |
(defn skipwhite [] | |
(def x cursor) | |
(while (= (c) (chr " ")) (++ cursor)) | |
(- cursor x)) | |
(defn skipline [] | |
(def x cursor) | |
(while (let [y (c)] (and y (not= y (chr "\n")))) (++ cursor)) | |
(c++) | |
(- cursor x)) | |
# Detection helpers - return number of characters matched | |
(defn ul? [] | |
(let [x (c) x1 (cn 1)] | |
(and | |
(= x1 (chr " ")) | |
(or (= x (chr "*")) (= x (chr "-"))) | |
2))) | |
(defn ol? [] | |
(def old cursor) | |
(while (and (>= (c) (chr "0")) (<= (c) (chr "9"))) (c++)) | |
(let [c1 (c) c2 (cn 1) c* cursor] | |
(set cursor old) | |
(if (and (= c1 (chr ".")) (= c2 (chr " "))) | |
(- c* cursor -2)))) | |
(defn fcb? [] (if (= (chr "`") (c) (cn 1) (cn 2)) 3)) | |
(defn nl? [] (= (chr "\n") (c))) | |
# Parse helper | |
# parse-* functions push nodes to `stack`, and return | |
# the indentation they leave the cursor on. | |
(var parse-blocks nil) # mutual recursion | |
(defn getslice [from to] | |
(def to (min to (length str))) | |
(string/slice str from to)) | |
(defn push [x] (array/push stack x)) | |
(defn parse-list [bullet-check initial indent] | |
(def temp-stack @[initial]) | |
(def old-stack stack) | |
(set stack temp-stack) | |
(var current-indent indent) | |
(while (and (c) (>= current-indent indent)) | |
(def item-indent | |
(when-let [x (bullet-check)] | |
(c+=n x) | |
(+ indent (skipwhite) x))) | |
(unless item-indent | |
(set current-indent (skipwhite)) | |
(break)) | |
(def item-stack @[]) | |
(set stack item-stack) | |
(set current-indent (parse-blocks item-indent)) | |
(set stack temp-stack) | |
(push item-stack)) | |
(set stack old-stack) | |
(push temp-stack) | |
current-indent) | |
(defn add-codeblock [indent start end] | |
(def replace-chunk (string "\n" (string/repeat " " indent))) | |
(push @[:cb (string/replace-all replace-chunk "\n" (getslice start end))]) | |
(skipline) | |
(skipwhite)) | |
(defn parse-fcb [indent] | |
(c+=n 3) | |
(skipline) | |
(c+=n indent) | |
(def start cursor) | |
(var end cursor) | |
(while (c) | |
(if (fcb?) (break)) | |
(skipline) | |
(set end cursor) | |
(skipwhite)) | |
(add-codeblock indent start end)) | |
(defn parse-icb [indent] | |
(var current-indent indent) | |
(def start cursor) | |
(var end cursor) | |
(while (c) | |
(skipline) | |
(set end cursor) | |
(set current-indent (skipwhite)) | |
(if (< current-indent indent) (break))) | |
(add-codeblock indent start end)) | |
(defn tokenize-line [line] | |
(def tokens @[]) | |
(def token @"") | |
(var token-length 0) | |
(defn delim [mode] | |
(def d (toggle-mode mode)) | |
(if-not has-color (+= token-length (length d))) | |
(buffer/push token d)) | |
(defn endtoken [] | |
(if (first token) (array/push tokens [(string token) token-length])) | |
(buffer/clear token) | |
(set token-length 0)) | |
(forv i 0 (length line) | |
(def b (get line i)) | |
(cond | |
(or (= b (chr "\n")) (= b (chr " "))) (endtoken) | |
(= b (chr "`")) (delim :code) | |
(not (modes :code)) | |
(cond | |
(= b (chr `\`)) (do | |
(++ token-length) | |
(buffer/push token (get line (++ i)))) | |
(= b (chr "_")) (delim :underline) | |
(= b (chr "*")) | |
(if (= (chr "*") (get line (+ i 1))) | |
(do (++ i) | |
(delim :bold)) | |
(delim :italics)) | |
(do (++ token-length) (buffer/push token b))) | |
(do (++ token-length) (buffer/push token b)))) | |
(endtoken) | |
(tuple/slice tokens)) | |
(set | |
parse-blocks | |
(fn parse-blocks [indent] | |
(var new-indent indent) | |
(var p-start nil) | |
(var p-end nil) | |
(defn p-line [] | |
(unless p-start | |
(set p-start cursor)) | |
(skipline) | |
(set p-end cursor) | |
(set new-indent (skipwhite))) | |
(defn finish-p [] | |
(when (and p-start (> p-end p-start)) | |
(push (tokenize-line (getslice p-start p-end))) | |
(set p-start nil))) | |
(while (and (c) (>= new-indent indent)) | |
(cond | |
(nl?) (do (finish-p) (c++) (set new-indent (skipwhite))) | |
(ul?) (do (finish-p) (set new-indent (parse-list ul? :ul new-indent))) | |
(ol?) (do (finish-p) (set new-indent (parse-list ol? :ol new-indent))) | |
(fcb?) (do (finish-p) (set new-indent (parse-fcb new-indent))) | |
(>= new-indent (+ 4 indent)) (do (finish-p) (set new-indent (parse-icb new-indent))) | |
(p-line))) | |
(finish-p) | |
new-indent)) | |
# Handle first line specially for defn, defmacro, etc. | |
(when (= (chr "(") (in str 0)) | |
(skipline) | |
(def first-line (string/slice str 0 (- cursor 1))) | |
(def fl-open (if has-color "\e[97m" "")) | |
(def fl-close (if has-color "\e[39m" "")) | |
(push [[(string fl-open first-line fl-close) (length first-line)]])) | |
(parse-blocks 0) | |
stack) | |
(comment | |
(def case-0-md | |
`` | |
A simple paragraph. | |
``) | |
(doc-parse-original case-0-md) | |
# => | |
@[[["A" 1] ["simple" 6] ["paragraph." 10]]] | |
(def case-0a-md | |
`` | |
* A | |
``) | |
(doc-parse-original case-0a-md) | |
# => | |
@[@[:ul @[[["A" 1]]]]] | |
(def case-1-md | |
`` | |
* A | |
* B | |
* C | |
``) | |
# XXX: undesirable result | |
(doc-parse-original case-1-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]]]] | |
@[:ul | |
@[[["C" 1]]]]] | |
(def case-2-md | |
`` | |
* A | |
* B | |
* C | |
``) | |
# XXX: undesirable result | |
(doc-parse-original case-2-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]]]] | |
@[:ul | |
@[[["C" 1]]]]] | |
(def case-3-md | |
`` | |
* A | |
* B | |
* B.1 | |
* B.2 | |
* C | |
``) | |
# XXX: undesirable result | |
(doc-parse-original case-3-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]] | |
@[:ul | |
@[[["B.1" 3]]] | |
@[[["B.2" 3]]]]]] | |
@[:ul | |
@[[["C" 1]]]]] | |
(def case-4-md | |
`` | |
* A | |
* B | |
* B.1 | |
* B.2 | |
* B.3 | |
* C | |
``) | |
# XXX: undesirable result | |
(doc-parse-original case-4-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]] | |
@[:ul | |
@[[["B.1" 3]]] | |
@[[["B.2" 3]]]]]] | |
@[:ul | |
@[[["B.3" 3]]]] | |
@[:ul | |
@[[["C" 1]]]]] | |
(def case-5-md | |
`` | |
* A | |
* B | |
* B.1 | |
* B.2 Hi there | |
There is some text here | |
* B.3 | |
* C | |
``) | |
# XXX: undesirable result | |
(doc-parse-original case-5-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]] | |
@[:ul | |
@[[["B.1" 3]]] | |
@[[["B.2" 3] ["Hi" 2] ["there" 5] ["There" 5] ["is" 2] | |
["some" 4] ["text" 4] ["here" 4]]]]]] | |
@[:ul | |
@[[["B.3" 3]]]] | |
@[:ul | |
@[[["C" 1]]]]] | |
) | |
(comment | |
(def str | |
(string | |
"012345678\n" | |
" \n" | |
"012345678\n" | |
"\n")) | |
(var cursor 0) | |
# just for test expressions | |
(defn c [] (get str cursor)) | |
(defn cn [n] (get str (+ n cursor))) | |
(defn blank-line? [] | |
(var i 0) | |
(var blank? false) | |
(while (def yi (cn i)) | |
(when (= yi (chr "\n")) (set blank? true) (break)) | |
(when (not= yi (chr " ")) (break)) | |
(++ i)) | |
(if blank? (inc i) false)) | |
(and (set cursor 0) | |
[(c) (blank-line?)]) | |
# => | |
[(chr "0") false] | |
(and (set cursor 10) | |
[(c) (blank-line?)]) | |
# => | |
[(chr " ") 10] | |
(and (set cursor 30) | |
[(c) (blank-line?)]) | |
# => | |
[(chr "\n") 1] | |
) | |
# XXX: this version at least doesn't break up things into multiple | |
# lists unnecessarily. might want to apply the blank-line? | |
# function developed in the comment block above as a refinement? | |
# may not be needed...not sure. | |
(defn doc-parse | |
`Extracted from doc-format` | |
[str &opt colorize] | |
(def has-color (if (not= nil colorize) | |
colorize | |
(dyn *doc-color*))) | |
# Terminal codes for emission/tokenization | |
(def delimiters | |
(if has-color | |
{:underline ["\e[4m" "\e[24m"] | |
:code ["\e[97m" "\e[39m"] | |
:italics ["\e[4m" "\e[24m"] | |
:bold ["\e[1m" "\e[22m"]} | |
{:underline ["_" "_"] | |
:code ["`" "`"] | |
:italics ["*" "*"] | |
:bold ["**" "**"]})) | |
(def modes @{}) | |
(defn toggle-mode [mode] | |
(def active (get modes mode)) | |
(def delims (get delimiters mode)) | |
(put modes mode (not active)) | |
(delims (if active 1 0))) | |
# Parse state | |
(var cursor 0) # indexes into string for parsing | |
(var stack @[]) # return value for this block. | |
# Traversal helpers | |
(defn c [] (get str cursor)) | |
(defn cn [n] (get str (+ n cursor))) | |
(defn c++ [] (let [ret (get str cursor)] (++ cursor) ret)) | |
(defn c+=n [n] (let [ret (get str cursor)] (+= cursor n) ret)) | |
# skip* functions return number of characters matched and advance the cursor. | |
(defn skipwhite [] | |
(def x cursor) | |
(while (= (c) (chr " ")) (++ cursor)) | |
(- cursor x)) | |
(defn skipline [] | |
(def x cursor) | |
(while (let [y (c)] (and y (not= y (chr "\n")))) (++ cursor)) | |
(c++) | |
(- cursor x)) | |
# Detection helpers - return number of characters matched | |
(defn ul? [] | |
(let [x (c) x1 (cn 1)] | |
(and | |
(= x1 (chr " ")) | |
(or (= x (chr "*")) (= x (chr "-"))) | |
2))) | |
(defn ol? [] | |
(def old cursor) | |
(while (and (>= (c) (chr "0")) (<= (c) (chr "9"))) (c++)) | |
(let [c1 (c) c2 (cn 1) c* cursor] | |
(set cursor old) | |
(if (and (= c1 (chr ".")) (= c2 (chr " "))) | |
(- c* cursor -2)))) | |
(defn fcb? [] (if (= (chr "`") (c) (cn 1) (cn 2)) 3)) | |
(defn nl? [] (if (= (chr "\n") (c)) 1)) | |
(defn blank-line? [] | |
(var i 0) | |
(var blank? false) | |
(while (def yi (cn i)) | |
(when (= yi (chr "\n")) (set blank? true) (break)) | |
(when (not= yi (chr " ")) (break)) | |
(++ i)) | |
(if blank? (inc i) false)) | |
# Parse helper | |
# parse-* functions push nodes to `stack`, and return | |
# the indentation they leave the cursor on. | |
(var parse-blocks nil) # mutual recursion | |
(defn getslice [from to] | |
(def to (min to (length str))) | |
(string/slice str from to)) | |
(defn push [x] (array/push stack x)) | |
(defn parse-list [bullet-check initial indent] | |
(def temp-stack @[initial]) | |
(def old-stack stack) | |
(set stack temp-stack) | |
(var current-indent indent) | |
(while (and (c) (>= current-indent indent)) | |
(if (blank-line?) | |
(skipline) | |
(do | |
(def item-indent | |
(when-let [x (bullet-check)] | |
(c+=n x) | |
(+ indent (skipwhite) x))) | |
(unless item-indent | |
(set current-indent (skipwhite)) | |
(break)) | |
(def item-stack @[]) | |
(set stack item-stack) | |
(set current-indent (parse-blocks item-indent)) | |
(set stack temp-stack) | |
(push item-stack)))) | |
(set stack old-stack) | |
(push temp-stack) | |
current-indent) | |
(defn add-codeblock [indent start end] | |
(def replace-chunk (string "\n" (string/repeat " " indent))) | |
(push @[:cb (string/replace-all replace-chunk "\n" (getslice start end))]) | |
(skipline) | |
(skipwhite)) | |
(defn parse-fcb [indent] | |
(c+=n 3) | |
(skipline) | |
(c+=n indent) | |
(def start cursor) | |
(var end cursor) | |
(while (c) | |
(if (fcb?) (break)) | |
(skipline) | |
(set end cursor) | |
(skipwhite)) | |
(add-codeblock indent start end)) | |
(defn parse-icb [indent] | |
(var current-indent indent) | |
(def start cursor) | |
(var end cursor) | |
(while (c) | |
(skipline) | |
(set end cursor) | |
(set current-indent (skipwhite)) | |
(if (< current-indent indent) (break))) | |
(add-codeblock indent start end)) | |
(defn tokenize-line [line] | |
(def tokens @[]) | |
(def token @"") | |
(var token-length 0) | |
(defn delim [mode] | |
(def d (toggle-mode mode)) | |
(if-not has-color (+= token-length (length d))) | |
(buffer/push token d)) | |
(defn endtoken [] | |
(if (first token) (array/push tokens [(string token) token-length])) | |
(buffer/clear token) | |
(set token-length 0)) | |
(forv i 0 (length line) | |
(def b (get line i)) | |
(cond | |
(or (= b (chr "\n")) (= b (chr " "))) (endtoken) | |
(= b (chr "`")) (delim :code) | |
(not (modes :code)) | |
(cond | |
(= b (chr `\`)) (do | |
(++ token-length) | |
(buffer/push token (get line (++ i)))) | |
(= b (chr "_")) (delim :underline) | |
(= b (chr "*")) | |
(if (= (chr "*") (get line (+ i 1))) | |
(do (++ i) | |
(delim :bold)) | |
(delim :italics)) | |
(do (++ token-length) (buffer/push token b))) | |
(do (++ token-length) (buffer/push token b)))) | |
(endtoken) | |
(tuple/slice tokens)) | |
(set | |
parse-blocks | |
(fn parse-blocks [indent] | |
(var new-indent indent) | |
(var p-start nil) | |
(var p-end nil) | |
(defn p-line [] | |
(unless p-start | |
(set p-start cursor)) | |
(skipline) | |
(set p-end cursor) | |
(set new-indent (skipwhite))) | |
(defn finish-p [] | |
(when (and p-start (> p-end p-start)) | |
(push (tokenize-line (getslice p-start p-end))) | |
(set p-start nil))) | |
(while (and (c) (or (nl?) | |
(>= new-indent indent))) | |
(cond | |
(nl?) (do (finish-p) (c++) (set new-indent (skipwhite))) | |
(ul?) (do (finish-p) (set new-indent (parse-list ul? :ul new-indent))) | |
(ol?) (do (finish-p) (set new-indent (parse-list ol? :ol new-indent))) | |
(fcb?) (do (finish-p) (set new-indent (parse-fcb new-indent))) | |
(>= new-indent (+ 4 indent)) (do (finish-p) (set new-indent (parse-icb new-indent))) | |
(p-line))) | |
(finish-p) | |
new-indent)) | |
# Handle first line specially for defn, defmacro, etc. | |
(when (= (chr "(") (in str 0)) | |
(skipline) | |
(def first-line (string/slice str 0 (- cursor 1))) | |
(def fl-open (if has-color "\e[97m" "")) | |
(def fl-close (if has-color "\e[39m" "")) | |
(push [[(string fl-open first-line fl-close) (length first-line)]])) | |
(parse-blocks 0) | |
stack) | |
(comment | |
(def case-1-md | |
`` | |
* A | |
* B | |
* C | |
``) | |
(doc-parse case-1-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]]] | |
@[[["C" 1]]]]] | |
(def case-1a-md | |
(string | |
"* A\n" | |
"* B\n" | |
" \n" | |
"* C")) | |
(doc-parse case-1a-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]]] | |
@[[["C" 1]]]]] | |
(def case-2-md | |
`` | |
* A | |
* B | |
* C | |
``) | |
(doc-parse case-2-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]]] | |
@[[["C" 1]]]]] | |
(def case-2a-md | |
(string | |
"* A\n" | |
"* B\n" | |
"\n" | |
" \n" | |
"* C")) | |
(doc-parse case-2a-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]]] | |
@[[["C" 1]]]]] | |
(def case-3-md | |
`` | |
* A | |
* B | |
* B.1 | |
* B.2 | |
* C | |
``) | |
(doc-parse case-3-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]] | |
@[:ul | |
@[[["B.1" 3]]] | |
@[[["B.2" 3]]]]] | |
@[[["C" 1]]]]] | |
(def case-4-md | |
`` | |
* A | |
* B | |
* B.1 | |
* B.2 | |
* B.3 | |
* C | |
``) | |
(doc-parse case-4-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]] | |
@[:ul | |
@[[["B.1" 3]]] | |
@[[["B.2" 3]]] | |
@[[["B.3" 3]]]]] | |
@[[["C" 1]]]]] | |
(def case-5-md | |
`` | |
* A | |
* B | |
* B.1 | |
* B.2 Hi there | |
There is some text here | |
* B.3 | |
* C | |
``) | |
(doc-parse case-5-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]] | |
@[:ul | |
@[[["B.1" 3]]] | |
@[[["B.2" 3] ["Hi" 2] ["there" 5] ["There" 5] ["is" 2] | |
["some" 4] ["text" 4] ["here" 4]]] | |
@[[["B.3" 3]]]]] | |
@[[["C" 1]]]]] | |
) | |
(defn doc-parse | |
`Extracted from doc-format` | |
[str &opt colorize] | |
(def has-color (if (not= nil colorize) | |
colorize | |
(dyn *doc-color*))) | |
# Terminal codes for emission/tokenization | |
(def delimiters | |
(if has-color | |
{:underline ["\e[4m" "\e[24m"] | |
:code ["\e[97m" "\e[39m"] | |
:italics ["\e[4m" "\e[24m"] | |
:bold ["\e[1m" "\e[22m"]} | |
{:underline ["_" "_"] | |
:code ["`" "`"] | |
:italics ["*" "*"] | |
:bold ["**" "**"]})) | |
(def modes @{}) | |
(defn toggle-mode [mode] | |
(def active (get modes mode)) | |
(def delims (get delimiters mode)) | |
(put modes mode (not active)) | |
(delims (if active 1 0))) | |
# Parse state | |
(var cursor 0) # indexes into string for parsing | |
(var stack @[]) # return value for this block. | |
# Traversal helpers | |
(defn c [] (get str cursor)) | |
(defn cn [n] (get str (+ n cursor))) | |
(defn c++ [] (let [ret (get str cursor)] (++ cursor) ret)) | |
(defn c+=n [n] (let [ret (get str cursor)] (+= cursor n) ret)) | |
# skip* functions return number of characters matched and advance the cursor. | |
(defn skipwhite [] | |
(def x cursor) | |
(while (= (c) (chr " ")) (++ cursor)) | |
(- cursor x)) | |
(defn skipline [] | |
(def x cursor) | |
(while (let [y (c)] (and y (not= y (chr "\n")))) (++ cursor)) | |
(c++) | |
(- cursor x)) | |
# Detection helpers - return number of characters matched | |
(defn ul? [] | |
(let [x (c) x1 (cn 1)] | |
(and | |
(= x1 (chr " ")) | |
(or (= x (chr "*")) (= x (chr "-"))) | |
2))) | |
(defn ol? [] | |
(def old cursor) | |
(while (and (>= (c) (chr "0")) (<= (c) (chr "9"))) (c++)) | |
(let [c1 (c) c2 (cn 1) c* cursor] | |
(set cursor old) | |
(if (and (= c1 (chr ".")) (= c2 (chr " "))) | |
(- c* cursor -2)))) | |
(defn fcb? [] (if (= (chr "`") (c) (cn 1) (cn 2)) 3)) | |
(defn nl? [] (if (= (chr "\n") (c)) 1)) | |
# Parse helper | |
# parse-* functions push nodes to `stack`, and return | |
# the indentation they leave the cursor on. | |
(var parse-blocks nil) # mutual recursion | |
(defn getslice [from to] | |
(def to (min to (length str))) | |
(string/slice str from to)) | |
(defn push [x] (array/push stack x)) | |
(defn show-to-eol | |
[marker] | |
(pp [:cursor cursor | |
:dump (string/slice str cursor | |
(string/find "\n" str cursor)) | |
marker])) | |
(defn parse-list [bullet-check initial indent] | |
(pp :parse-list) | |
(def temp-stack @[initial]) | |
(def old-stack stack) | |
(set stack temp-stack) | |
(var current-indent indent) | |
(var loose? false) | |
(while (and (c) (>= current-indent indent)) | |
(show-to-eol :parse-list-while) | |
(def item-indent | |
(when-let [x (bullet-check)] | |
(c+=n x) | |
(+ indent (skipwhite) x))) | |
(unless item-indent | |
(set current-indent (skipwhite)) | |
(break)) | |
(def item-stack @[]) | |
(set stack item-stack) | |
(pp [:entering-parse-blocks-from-parse-list]) | |
(set current-indent (parse-blocks item-indent)) | |
(pp [:returning-from-parse-blocks-to-parse-list]) | |
(when (has-value? stack :loose) | |
(pp [:filtered (filter |(not= :loose $) stack)]) | |
(pp [:found-loose-in stack]) | |
(pp [:temp-stack temp-stack]) | |
(pp [:item-stack item-stack]) | |
(put temp-stack 0 (keyword (first temp-stack) "-loose"))) | |
(set stack temp-stack) | |
(push (filter |(not= :loose $) item-stack)) | |
(pp [:bottom-of-parse-list-while | |
:current-indent current-indent | |
:indent indent])) | |
(pp :after-parse-list-while) | |
(set stack old-stack) | |
(push temp-stack) | |
current-indent) | |
(defn add-codeblock [indent start end] | |
(def replace-chunk (string "\n" (string/repeat " " indent))) | |
(push @[:cb (string/replace-all replace-chunk "\n" (getslice start end))]) | |
(skipline) | |
(skipwhite)) | |
(defn parse-fcb [indent] | |
(c+=n 3) | |
(skipline) | |
(c+=n indent) | |
(def start cursor) | |
(var end cursor) | |
(while (c) | |
(if (fcb?) (break)) | |
(skipline) | |
(set end cursor) | |
(skipwhite)) | |
(add-codeblock indent start end)) | |
(defn parse-icb [indent] | |
(var current-indent indent) | |
(def start cursor) | |
(var end cursor) | |
(while (c) | |
(skipline) | |
(set end cursor) | |
(set current-indent (skipwhite)) | |
(if (< current-indent indent) (break))) | |
(add-codeblock indent start end)) | |
(defn tokenize-line [line] | |
(def tokens @[]) | |
(def token @"") | |
(var token-length 0) | |
(defn delim [mode] | |
(def d (toggle-mode mode)) | |
(if-not has-color (+= token-length (length d))) | |
(buffer/push token d)) | |
(defn endtoken [] | |
(if (first token) (array/push tokens [(string token) token-length])) | |
(buffer/clear token) | |
(set token-length 0)) | |
(forv i 0 (length line) | |
(def b (get line i)) | |
(cond | |
(or (= b (chr "\n")) (= b (chr " "))) (endtoken) | |
(= b (chr "`")) (delim :code) | |
(not (modes :code)) | |
(cond | |
(= b (chr `\`)) (do | |
(++ token-length) | |
(buffer/push token (get line (++ i)))) | |
(= b (chr "_")) (delim :underline) | |
(= b (chr "*")) | |
(if (= (chr "*") (get line (+ i 1))) | |
(do (++ i) | |
(delim :bold)) | |
(delim :italics)) | |
(do (++ token-length) (buffer/push token b))) | |
(do (++ token-length) (buffer/push token b)))) | |
(endtoken) | |
(tuple/slice tokens)) | |
(set | |
parse-blocks | |
(fn parse-blocks [indent] | |
(pp :parse-blocks) | |
(var new-indent indent) | |
(var p-start nil) | |
(var p-end nil) | |
(defn p-line [] | |
(unless p-start | |
(set p-start cursor)) | |
(skipline) | |
(set p-end cursor) | |
(set new-indent (skipwhite))) | |
(defn finish-p [] | |
(when (and p-start (> p-end p-start)) | |
(push (tokenize-line (getslice p-start p-end))) | |
(set p-start nil))) | |
(while (and (c) (or (nl?) | |
(>= new-indent indent))) | |
(show-to-eol :parse-blocks-while) | |
(cond | |
(nl?) (do (pp :nl-branch) (push :loose) (finish-p) (c++) (set new-indent (skipwhite))) | |
(ul?) (do (pp :ul-branch) (finish-p) (set new-indent (parse-list ul? :ul new-indent))) | |
(ol?) (do (pp :ol-branch) (finish-p) (set new-indent (parse-list ol? :ol new-indent))) | |
(fcb?) (do (finish-p) (set new-indent (parse-fcb new-indent))) | |
(>= new-indent (+ 4 indent)) (do (finish-p) (set new-indent (parse-icb new-indent))) | |
(do (pp :default-p-line) (p-line))) | |
(pp [:bottom-of-parse-blocks-while | |
:new-indent new-indent | |
:indent indent])) | |
(pp :after-parse-blocks-while) | |
(finish-p) | |
new-indent)) | |
# Handle first line specially for defn, defmacro, etc. | |
(when (= (chr "(") (in str 0)) | |
(skipline) | |
(def first-line (string/slice str 0 (- cursor 1))) | |
(def fl-open (if has-color "\e[97m" "")) | |
(def fl-close (if has-color "\e[39m" "")) | |
(push [[(string fl-open first-line fl-close) (length first-line)]])) | |
(parse-blocks 0) | |
stack) | |
(comment | |
(def case-1-md | |
`` | |
* A | |
* B | |
* C | |
``) | |
(doc-parse case-1-md) | |
# => | |
@[@[:ul-loose | |
@[[["A" 1]]] | |
@[[["B" 1]]] | |
@[[["C" 1]]]]] | |
(def case-1a-md | |
(string | |
"* A\n" | |
"* B\n" | |
" \n" | |
"* C")) | |
(doc-parse case-1a-md) | |
# => | |
@[@[:ul-loose | |
@[[["A" 1]]] | |
@[[["B" 1]]] | |
@[[["C" 1]]]]] | |
(def case-2-md | |
`` | |
* A | |
* B | |
* C | |
``) | |
(doc-parse case-2-md) | |
# => | |
@[@[:ul-loose | |
@[[["A" 1]]] | |
@[[["B" 1]]] | |
@[[["C" 1]]]]] | |
(def case-2a-md | |
(string | |
"* A\n" | |
"* B\n" | |
"\n" | |
" \n" | |
"* C")) | |
(doc-parse case-2a-md) | |
# => | |
@[@[:ul-loose | |
@[[["A" 1]]] | |
@[[["B" 1]]] | |
@[[["C" 1]]]]] | |
(def case-3-md | |
`` | |
* A | |
* B | |
* B.1 | |
* B.2 | |
* C | |
``) | |
(doc-parse case-3-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]] | |
@[:ul-loose | |
@[[["B.1" 3]]] | |
@[[["B.2" 3]]]]] | |
@[[["C" 1]]]]] | |
(def case-4-md | |
`` | |
* A | |
* B | |
* B.1 | |
* B.2 | |
* B.3 | |
* C | |
``) | |
(doc-parse case-4-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]] | |
@[:ul-loose | |
@[[["B.1" 3]]] | |
@[[["B.2" 3]]] | |
@[[["B.3" 3]]]]] | |
@[[["C" 1]]]]] | |
(def case-5-md | |
`` | |
* A | |
* B | |
* B.1 | |
* B.2 Hi there | |
There is some text here | |
* B.3 | |
* C | |
``) | |
(doc-parse case-5-md) | |
# => | |
@[@[:ul | |
@[[["A" 1]]] | |
@[[["B" 1]] | |
@[:ul-loose | |
@[[["B.1" 3]]] | |
@[[["B.2" 3] ["Hi" 2] ["there" 5] ["There" 5] ["is" 2] | |
["some" 4] ["text" 4] ["here" 4]]] | |
@[[["B.3" 3]]]]] | |
@[[["C" 1]]]]] | |
) | |
(defn doc-emit | |
[stack &opt width indent colorize] | |
(default indent 4) | |
(def max-width (- (or width (dyn *doc-width* 80)) 8)) | |
(def has-color (if (not= nil colorize) | |
colorize | |
(dyn *doc-color*))) | |
# Emission state | |
(def buf @"") | |
(var current-column 0) | |
# Emission | |
(defn emit-indent [indent] | |
(def delta (- indent current-column)) | |
(when (< 0 delta) | |
(buffer/push buf (string/repeat " " delta)) | |
(set current-column indent))) | |
(defn emit-nl [&opt indent] | |
(buffer/push buf "\n") | |
(set current-column 0)) | |
(defn emit-word [word indent &opt len] | |
(def last-byte (last buf)) | |
(when (and | |
last-byte | |
(not= last-byte (chr "\n")) | |
(not= last-byte (chr " "))) | |
(buffer/push buf " ") | |
(++ current-column)) | |
(default len (length word)) | |
(when (and indent (> (+ 1 current-column len) max-width)) | |
(emit-nl) | |
(emit-indent indent)) | |
(buffer/push buf word) | |
(+= current-column len)) | |
(defn emit-code | |
[code indent] | |
(def replacement (string "\n" (string/repeat " " (+ 4 indent)))) | |
(emit-indent (+ 4 indent)) | |
(buffer/push buf (string/replace-all "\n" replacement code)) | |
(if (= (chr "\n") (last code)) | |
(set current-column 0) | |
(emit-nl))) | |
(defn emit-node | |
[el indent] | |
(emit-indent indent) | |
(if (tuple? el) | |
(let [rep (string "\n" (string/repeat " " indent))] | |
(each [word len] el | |
(emit-word | |
(string/replace-all "\n" rep word) | |
indent | |
len)) | |
(emit-nl)) | |
(case (first el) | |
:ul (for i 1 (length el) | |
(if (> i 1) (emit-indent indent)) | |
(emit-word "* " nil) | |
(each subel (get el i) (emit-node subel (+ 2 indent)))) | |
:ul-loose | |
(for i 1 (length el) | |
(if (> i 1) (emit-indent indent)) | |
(emit-word "* " nil) | |
(each subel (get el i) (emit-node subel (+ 2 indent)) (emit-nl))) | |
:ol (for i 1 (length el) | |
(if (> i 1) (emit-indent indent)) | |
(def lab (string/format "%d. " i)) | |
(emit-word lab nil) | |
(each subel (get el i) (emit-node subel (+ (length lab) indent)))) | |
:ol-loose | |
(for i 1 (length el) | |
(if (> i 1) (emit-indent indent)) | |
(def lab (string/format "%d. " i)) | |
(emit-word lab nil) | |
(each subel (get el i) (emit-node subel (+ (length lab) indent)) (emit-nl))) | |
:cb (emit-code (get el 1) indent)))) | |
(each el stack | |
(emit-nl) | |
(emit-node el indent)) | |
buf) | |
(comment | |
(def case-0-md | |
`` | |
* A | |
* B | |
* C | |
``) | |
(doc-emit (doc-parse case-0-md) 80 0) | |
# => | |
(buffer | |
"\n" | |
"* A\n" | |
"* B\n" | |
"* C\n") | |
(def case-1-md | |
`` | |
* A | |
* B | |
* C | |
``) | |
(doc-emit (doc-parse case-1-md)) | |
# => | |
(buffer | |
"\n" | |
" * A\n" | |
"\n" | |
" * B\n" | |
"\n" | |
" * C\n" | |
"\n") | |
(def case-2-md | |
`` | |
1. A | |
2. B | |
3. C | |
``) | |
(doc-emit (doc-parse case-2-md) 80 0) | |
# => | |
(buffer | |
"\n" | |
"1. A\n" | |
"\n" | |
"2. B\n" | |
"\n" | |
"3. C\n" | |
"\n") | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment