Skip to content

Instantly share code, notes, and snippets.

@sogaiu
Last active May 7, 2025 05:13
Show Gist options
  • Save sogaiu/31b7ddcc1eab945d27ddb7e08aa28654 to your computer and use it in GitHub Desktop.
Save sogaiu/31b7ddcc1eab945d27ddb7e08aa28654 to your computer and use it in GitHub Desktop.
experimenting with doc-parse extracted (and slightly modified) from doc-format
# for comparing with version being explored below
(defn doc-parse-original
`Extracted from doc-format`
[str &opt colorize]
(def has-color (if (not= nil colorize)
colorize
(dyn *doc-color*)))
# Terminal codes for emission/tokenization
(def delimiters
(if has-color
{:underline ["\e[4m" "\e[24m"]
:code ["\e[97m" "\e[39m"]
:italics ["\e[4m" "\e[24m"]
:bold ["\e[1m" "\e[22m"]}
{:underline ["_" "_"]
:code ["`" "`"]
:italics ["*" "*"]
:bold ["**" "**"]}))
(def modes @{})
(defn toggle-mode [mode]
(def active (get modes mode))
(def delims (get delimiters mode))
(put modes mode (not active))
(delims (if active 1 0)))
# Parse state
(var cursor 0) # indexes into string for parsing
(var stack @[]) # return value for this block.
# Traversal helpers
(defn c [] (get str cursor))
(defn cn [n] (get str (+ n cursor)))
(defn c++ [] (let [ret (get str cursor)] (++ cursor) ret))
(defn c+=n [n] (let [ret (get str cursor)] (+= cursor n) ret))
# skip* functions return number of characters matched and advance the cursor.
(defn skipwhite []
(def x cursor)
(while (= (c) (chr " ")) (++ cursor))
(- cursor x))
(defn skipline []
(def x cursor)
(while (let [y (c)] (and y (not= y (chr "\n")))) (++ cursor))
(c++)
(- cursor x))
# Detection helpers - return number of characters matched
(defn ul? []
(let [x (c) x1 (cn 1)]
(and
(= x1 (chr " "))
(or (= x (chr "*")) (= x (chr "-")))
2)))
(defn ol? []
(def old cursor)
(while (and (>= (c) (chr "0")) (<= (c) (chr "9"))) (c++))
(let [c1 (c) c2 (cn 1) c* cursor]
(set cursor old)
(if (and (= c1 (chr ".")) (= c2 (chr " ")))
(- c* cursor -2))))
(defn fcb? [] (if (= (chr "`") (c) (cn 1) (cn 2)) 3))
(defn nl? [] (= (chr "\n") (c)))
# Parse helper
# parse-* functions push nodes to `stack`, and return
# the indentation they leave the cursor on.
(var parse-blocks nil) # mutual recursion
(defn getslice [from to]
(def to (min to (length str)))
(string/slice str from to))
(defn push [x] (array/push stack x))
(defn parse-list [bullet-check initial indent]
(def temp-stack @[initial])
(def old-stack stack)
(set stack temp-stack)
(var current-indent indent)
(while (and (c) (>= current-indent indent))
(def item-indent
(when-let [x (bullet-check)]
(c+=n x)
(+ indent (skipwhite) x)))
(unless item-indent
(set current-indent (skipwhite))
(break))
(def item-stack @[])
(set stack item-stack)
(set current-indent (parse-blocks item-indent))
(set stack temp-stack)
(push item-stack))
(set stack old-stack)
(push temp-stack)
current-indent)
(defn add-codeblock [indent start end]
(def replace-chunk (string "\n" (string/repeat " " indent)))
(push @[:cb (string/replace-all replace-chunk "\n" (getslice start end))])
(skipline)
(skipwhite))
(defn parse-fcb [indent]
(c+=n 3)
(skipline)
(c+=n indent)
(def start cursor)
(var end cursor)
(while (c)
(if (fcb?) (break))
(skipline)
(set end cursor)
(skipwhite))
(add-codeblock indent start end))
(defn parse-icb [indent]
(var current-indent indent)
(def start cursor)
(var end cursor)
(while (c)
(skipline)
(set end cursor)
(set current-indent (skipwhite))
(if (< current-indent indent) (break)))
(add-codeblock indent start end))
(defn tokenize-line [line]
(def tokens @[])
(def token @"")
(var token-length 0)
(defn delim [mode]
(def d (toggle-mode mode))
(if-not has-color (+= token-length (length d)))
(buffer/push token d))
(defn endtoken []
(if (first token) (array/push tokens [(string token) token-length]))
(buffer/clear token)
(set token-length 0))
(forv i 0 (length line)
(def b (get line i))
(cond
(or (= b (chr "\n")) (= b (chr " "))) (endtoken)
(= b (chr "`")) (delim :code)
(not (modes :code))
(cond
(= b (chr `\`)) (do
(++ token-length)
(buffer/push token (get line (++ i))))
(= b (chr "_")) (delim :underline)
(= b (chr "*"))
(if (= (chr "*") (get line (+ i 1)))
(do (++ i)
(delim :bold))
(delim :italics))
(do (++ token-length) (buffer/push token b)))
(do (++ token-length) (buffer/push token b))))
(endtoken)
(tuple/slice tokens))
(set
parse-blocks
(fn parse-blocks [indent]
(var new-indent indent)
(var p-start nil)
(var p-end nil)
(defn p-line []
(unless p-start
(set p-start cursor))
(skipline)
(set p-end cursor)
(set new-indent (skipwhite)))
(defn finish-p []
(when (and p-start (> p-end p-start))
(push (tokenize-line (getslice p-start p-end)))
(set p-start nil)))
(while (and (c) (>= new-indent indent))
(cond
(nl?) (do (finish-p) (c++) (set new-indent (skipwhite)))
(ul?) (do (finish-p) (set new-indent (parse-list ul? :ul new-indent)))
(ol?) (do (finish-p) (set new-indent (parse-list ol? :ol new-indent)))
(fcb?) (do (finish-p) (set new-indent (parse-fcb new-indent)))
(>= new-indent (+ 4 indent)) (do (finish-p) (set new-indent (parse-icb new-indent)))
(p-line)))
(finish-p)
new-indent))
# Handle first line specially for defn, defmacro, etc.
(when (= (chr "(") (in str 0))
(skipline)
(def first-line (string/slice str 0 (- cursor 1)))
(def fl-open (if has-color "\e[97m" ""))
(def fl-close (if has-color "\e[39m" ""))
(push [[(string fl-open first-line fl-close) (length first-line)]]))
(parse-blocks 0)
stack)
(comment
(def case-0-md
``
A simple paragraph.
``)
(doc-parse-original case-0-md)
# =>
@[[["A" 1] ["simple" 6] ["paragraph." 10]]]
(def case-0a-md
``
* A
``)
(doc-parse-original case-0a-md)
# =>
@[@[:ul @[[["A" 1]]]]]
(def case-1-md
``
* A
* B
* C
``)
# XXX: undesirable result
(doc-parse-original case-1-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]]]
@[:ul
@[[["C" 1]]]]]
(def case-2-md
``
* A
* B
* C
``)
# XXX: undesirable result
(doc-parse-original case-2-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]]]
@[:ul
@[[["C" 1]]]]]
(def case-3-md
``
* A
* B
* B.1
* B.2
* C
``)
# XXX: undesirable result
(doc-parse-original case-3-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]
@[:ul
@[[["B.1" 3]]]
@[[["B.2" 3]]]]]]
@[:ul
@[[["C" 1]]]]]
(def case-4-md
``
* A
* B
* B.1
* B.2
* B.3
* C
``)
# XXX: undesirable result
(doc-parse-original case-4-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]
@[:ul
@[[["B.1" 3]]]
@[[["B.2" 3]]]]]]
@[:ul
@[[["B.3" 3]]]]
@[:ul
@[[["C" 1]]]]]
(def case-5-md
``
* A
* B
* B.1
* B.2 Hi there
There is some text here
* B.3
* C
``)
# XXX: undesirable result
(doc-parse-original case-5-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]
@[:ul
@[[["B.1" 3]]]
@[[["B.2" 3] ["Hi" 2] ["there" 5] ["There" 5] ["is" 2]
["some" 4] ["text" 4] ["here" 4]]]]]]
@[:ul
@[[["B.3" 3]]]]
@[:ul
@[[["C" 1]]]]]
)
(comment
(def str
(string
"012345678\n"
" \n"
"012345678\n"
"\n"))
(var cursor 0)
# just for test expressions
(defn c [] (get str cursor))
(defn cn [n] (get str (+ n cursor)))
(defn blank-line? []
(var i 0)
(var blank? false)
(while (def yi (cn i))
(when (= yi (chr "\n")) (set blank? true) (break))
(when (not= yi (chr " ")) (break))
(++ i))
(if blank? (inc i) false))
(and (set cursor 0)
[(c) (blank-line?)])
# =>
[(chr "0") false]
(and (set cursor 10)
[(c) (blank-line?)])
# =>
[(chr " ") 10]
(and (set cursor 30)
[(c) (blank-line?)])
# =>
[(chr "\n") 1]
)
# XXX: this version at least doesn't break up things into multiple
# lists unnecessarily. might want to apply the blank-line?
# function developed in the comment block above as a refinement?
# may not be needed...not sure.
(defn doc-parse
`Extracted from doc-format`
[str &opt colorize]
(def has-color (if (not= nil colorize)
colorize
(dyn *doc-color*)))
# Terminal codes for emission/tokenization
(def delimiters
(if has-color
{:underline ["\e[4m" "\e[24m"]
:code ["\e[97m" "\e[39m"]
:italics ["\e[4m" "\e[24m"]
:bold ["\e[1m" "\e[22m"]}
{:underline ["_" "_"]
:code ["`" "`"]
:italics ["*" "*"]
:bold ["**" "**"]}))
(def modes @{})
(defn toggle-mode [mode]
(def active (get modes mode))
(def delims (get delimiters mode))
(put modes mode (not active))
(delims (if active 1 0)))
# Parse state
(var cursor 0) # indexes into string for parsing
(var stack @[]) # return value for this block.
# Traversal helpers
(defn c [] (get str cursor))
(defn cn [n] (get str (+ n cursor)))
(defn c++ [] (let [ret (get str cursor)] (++ cursor) ret))
(defn c+=n [n] (let [ret (get str cursor)] (+= cursor n) ret))
# skip* functions return number of characters matched and advance the cursor.
(defn skipwhite []
(def x cursor)
(while (= (c) (chr " ")) (++ cursor))
(- cursor x))
(defn skipline []
(def x cursor)
(while (let [y (c)] (and y (not= y (chr "\n")))) (++ cursor))
(c++)
(- cursor x))
# Detection helpers - return number of characters matched
(defn ul? []
(let [x (c) x1 (cn 1)]
(and
(= x1 (chr " "))
(or (= x (chr "*")) (= x (chr "-")))
2)))
(defn ol? []
(def old cursor)
(while (and (>= (c) (chr "0")) (<= (c) (chr "9"))) (c++))
(let [c1 (c) c2 (cn 1) c* cursor]
(set cursor old)
(if (and (= c1 (chr ".")) (= c2 (chr " ")))
(- c* cursor -2))))
(defn fcb? [] (if (= (chr "`") (c) (cn 1) (cn 2)) 3))
(defn nl? [] (if (= (chr "\n") (c)) 1))
(defn blank-line? []
(var i 0)
(var blank? false)
(while (def yi (cn i))
(when (= yi (chr "\n")) (set blank? true) (break))
(when (not= yi (chr " ")) (break))
(++ i))
(if blank? (inc i) false))
# Parse helper
# parse-* functions push nodes to `stack`, and return
# the indentation they leave the cursor on.
(var parse-blocks nil) # mutual recursion
(defn getslice [from to]
(def to (min to (length str)))
(string/slice str from to))
(defn push [x] (array/push stack x))
(defn parse-list [bullet-check initial indent]
(def temp-stack @[initial])
(def old-stack stack)
(set stack temp-stack)
(var current-indent indent)
(while (and (c) (>= current-indent indent))
(if (blank-line?)
(skipline)
(do
(def item-indent
(when-let [x (bullet-check)]
(c+=n x)
(+ indent (skipwhite) x)))
(unless item-indent
(set current-indent (skipwhite))
(break))
(def item-stack @[])
(set stack item-stack)
(set current-indent (parse-blocks item-indent))
(set stack temp-stack)
(push item-stack))))
(set stack old-stack)
(push temp-stack)
current-indent)
(defn add-codeblock [indent start end]
(def replace-chunk (string "\n" (string/repeat " " indent)))
(push @[:cb (string/replace-all replace-chunk "\n" (getslice start end))])
(skipline)
(skipwhite))
(defn parse-fcb [indent]
(c+=n 3)
(skipline)
(c+=n indent)
(def start cursor)
(var end cursor)
(while (c)
(if (fcb?) (break))
(skipline)
(set end cursor)
(skipwhite))
(add-codeblock indent start end))
(defn parse-icb [indent]
(var current-indent indent)
(def start cursor)
(var end cursor)
(while (c)
(skipline)
(set end cursor)
(set current-indent (skipwhite))
(if (< current-indent indent) (break)))
(add-codeblock indent start end))
(defn tokenize-line [line]
(def tokens @[])
(def token @"")
(var token-length 0)
(defn delim [mode]
(def d (toggle-mode mode))
(if-not has-color (+= token-length (length d)))
(buffer/push token d))
(defn endtoken []
(if (first token) (array/push tokens [(string token) token-length]))
(buffer/clear token)
(set token-length 0))
(forv i 0 (length line)
(def b (get line i))
(cond
(or (= b (chr "\n")) (= b (chr " "))) (endtoken)
(= b (chr "`")) (delim :code)
(not (modes :code))
(cond
(= b (chr `\`)) (do
(++ token-length)
(buffer/push token (get line (++ i))))
(= b (chr "_")) (delim :underline)
(= b (chr "*"))
(if (= (chr "*") (get line (+ i 1)))
(do (++ i)
(delim :bold))
(delim :italics))
(do (++ token-length) (buffer/push token b)))
(do (++ token-length) (buffer/push token b))))
(endtoken)
(tuple/slice tokens))
(set
parse-blocks
(fn parse-blocks [indent]
(var new-indent indent)
(var p-start nil)
(var p-end nil)
(defn p-line []
(unless p-start
(set p-start cursor))
(skipline)
(set p-end cursor)
(set new-indent (skipwhite)))
(defn finish-p []
(when (and p-start (> p-end p-start))
(push (tokenize-line (getslice p-start p-end)))
(set p-start nil)))
(while (and (c) (or (nl?)
(>= new-indent indent)))
(cond
(nl?) (do (finish-p) (c++) (set new-indent (skipwhite)))
(ul?) (do (finish-p) (set new-indent (parse-list ul? :ul new-indent)))
(ol?) (do (finish-p) (set new-indent (parse-list ol? :ol new-indent)))
(fcb?) (do (finish-p) (set new-indent (parse-fcb new-indent)))
(>= new-indent (+ 4 indent)) (do (finish-p) (set new-indent (parse-icb new-indent)))
(p-line)))
(finish-p)
new-indent))
# Handle first line specially for defn, defmacro, etc.
(when (= (chr "(") (in str 0))
(skipline)
(def first-line (string/slice str 0 (- cursor 1)))
(def fl-open (if has-color "\e[97m" ""))
(def fl-close (if has-color "\e[39m" ""))
(push [[(string fl-open first-line fl-close) (length first-line)]]))
(parse-blocks 0)
stack)
(comment
(def case-1-md
``
* A
* B
* C
``)
(doc-parse case-1-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]]
@[[["C" 1]]]]]
(def case-1a-md
(string
"* A\n"
"* B\n"
" \n"
"* C"))
(doc-parse case-1a-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]]
@[[["C" 1]]]]]
(def case-2-md
``
* A
* B
* C
``)
(doc-parse case-2-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]]
@[[["C" 1]]]]]
(def case-2a-md
(string
"* A\n"
"* B\n"
"\n"
" \n"
"* C"))
(doc-parse case-2a-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]]
@[[["C" 1]]]]]
(def case-3-md
``
* A
* B
* B.1
* B.2
* C
``)
(doc-parse case-3-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]
@[:ul
@[[["B.1" 3]]]
@[[["B.2" 3]]]]]
@[[["C" 1]]]]]
(def case-4-md
``
* A
* B
* B.1
* B.2
* B.3
* C
``)
(doc-parse case-4-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]
@[:ul
@[[["B.1" 3]]]
@[[["B.2" 3]]]
@[[["B.3" 3]]]]]
@[[["C" 1]]]]]
(def case-5-md
``
* A
* B
* B.1
* B.2 Hi there
There is some text here
* B.3
* C
``)
(doc-parse case-5-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]
@[:ul
@[[["B.1" 3]]]
@[[["B.2" 3] ["Hi" 2] ["there" 5] ["There" 5] ["is" 2]
["some" 4] ["text" 4] ["here" 4]]]
@[[["B.3" 3]]]]]
@[[["C" 1]]]]]
)
(defn doc-parse
`Extracted from doc-format`
[str &opt colorize]
(def has-color (if (not= nil colorize)
colorize
(dyn *doc-color*)))
# Terminal codes for emission/tokenization
(def delimiters
(if has-color
{:underline ["\e[4m" "\e[24m"]
:code ["\e[97m" "\e[39m"]
:italics ["\e[4m" "\e[24m"]
:bold ["\e[1m" "\e[22m"]}
{:underline ["_" "_"]
:code ["`" "`"]
:italics ["*" "*"]
:bold ["**" "**"]}))
(def modes @{})
(defn toggle-mode [mode]
(def active (get modes mode))
(def delims (get delimiters mode))
(put modes mode (not active))
(delims (if active 1 0)))
# Parse state
(var cursor 0) # indexes into string for parsing
(var stack @[]) # return value for this block.
# Traversal helpers
(defn c [] (get str cursor))
(defn cn [n] (get str (+ n cursor)))
(defn c++ [] (let [ret (get str cursor)] (++ cursor) ret))
(defn c+=n [n] (let [ret (get str cursor)] (+= cursor n) ret))
# skip* functions return number of characters matched and advance the cursor.
(defn skipwhite []
(def x cursor)
(while (= (c) (chr " ")) (++ cursor))
(- cursor x))
(defn skipline []
(def x cursor)
(while (let [y (c)] (and y (not= y (chr "\n")))) (++ cursor))
(c++)
(- cursor x))
# Detection helpers - return number of characters matched
(defn ul? []
(let [x (c) x1 (cn 1)]
(and
(= x1 (chr " "))
(or (= x (chr "*")) (= x (chr "-")))
2)))
(defn ol? []
(def old cursor)
(while (and (>= (c) (chr "0")) (<= (c) (chr "9"))) (c++))
(let [c1 (c) c2 (cn 1) c* cursor]
(set cursor old)
(if (and (= c1 (chr ".")) (= c2 (chr " ")))
(- c* cursor -2))))
(defn fcb? [] (if (= (chr "`") (c) (cn 1) (cn 2)) 3))
(defn nl? [] (if (= (chr "\n") (c)) 1))
# Parse helper
# parse-* functions push nodes to `stack`, and return
# the indentation they leave the cursor on.
(var parse-blocks nil) # mutual recursion
(defn getslice [from to]
(def to (min to (length str)))
(string/slice str from to))
(defn push [x] (array/push stack x))
(defn show-to-eol
[marker]
(pp [:cursor cursor
:dump (string/slice str cursor
(string/find "\n" str cursor))
marker]))
(defn parse-list [bullet-check initial indent]
(pp :parse-list)
(def temp-stack @[initial])
(def old-stack stack)
(set stack temp-stack)
(var current-indent indent)
(var loose? false)
(while (and (c) (>= current-indent indent))
(show-to-eol :parse-list-while)
(def item-indent
(when-let [x (bullet-check)]
(c+=n x)
(+ indent (skipwhite) x)))
(unless item-indent
(set current-indent (skipwhite))
(break))
(def item-stack @[])
(set stack item-stack)
(pp [:entering-parse-blocks-from-parse-list])
(set current-indent (parse-blocks item-indent))
(pp [:returning-from-parse-blocks-to-parse-list])
(when (has-value? stack :loose)
(pp [:filtered (filter |(not= :loose $) stack)])
(pp [:found-loose-in stack])
(pp [:temp-stack temp-stack])
(pp [:item-stack item-stack])
(put temp-stack 0 (keyword (first temp-stack) "-loose")))
(set stack temp-stack)
(push (filter |(not= :loose $) item-stack))
(pp [:bottom-of-parse-list-while
:current-indent current-indent
:indent indent]))
(pp :after-parse-list-while)
(set stack old-stack)
(push temp-stack)
current-indent)
(defn add-codeblock [indent start end]
(def replace-chunk (string "\n" (string/repeat " " indent)))
(push @[:cb (string/replace-all replace-chunk "\n" (getslice start end))])
(skipline)
(skipwhite))
(defn parse-fcb [indent]
(c+=n 3)
(skipline)
(c+=n indent)
(def start cursor)
(var end cursor)
(while (c)
(if (fcb?) (break))
(skipline)
(set end cursor)
(skipwhite))
(add-codeblock indent start end))
(defn parse-icb [indent]
(var current-indent indent)
(def start cursor)
(var end cursor)
(while (c)
(skipline)
(set end cursor)
(set current-indent (skipwhite))
(if (< current-indent indent) (break)))
(add-codeblock indent start end))
(defn tokenize-line [line]
(def tokens @[])
(def token @"")
(var token-length 0)
(defn delim [mode]
(def d (toggle-mode mode))
(if-not has-color (+= token-length (length d)))
(buffer/push token d))
(defn endtoken []
(if (first token) (array/push tokens [(string token) token-length]))
(buffer/clear token)
(set token-length 0))
(forv i 0 (length line)
(def b (get line i))
(cond
(or (= b (chr "\n")) (= b (chr " "))) (endtoken)
(= b (chr "`")) (delim :code)
(not (modes :code))
(cond
(= b (chr `\`)) (do
(++ token-length)
(buffer/push token (get line (++ i))))
(= b (chr "_")) (delim :underline)
(= b (chr "*"))
(if (= (chr "*") (get line (+ i 1)))
(do (++ i)
(delim :bold))
(delim :italics))
(do (++ token-length) (buffer/push token b)))
(do (++ token-length) (buffer/push token b))))
(endtoken)
(tuple/slice tokens))
(set
parse-blocks
(fn parse-blocks [indent]
(pp :parse-blocks)
(var new-indent indent)
(var p-start nil)
(var p-end nil)
(defn p-line []
(unless p-start
(set p-start cursor))
(skipline)
(set p-end cursor)
(set new-indent (skipwhite)))
(defn finish-p []
(when (and p-start (> p-end p-start))
(push (tokenize-line (getslice p-start p-end)))
(set p-start nil)))
(while (and (c) (or (nl?)
(>= new-indent indent)))
(show-to-eol :parse-blocks-while)
(cond
(nl?) (do (pp :nl-branch) (push :loose) (finish-p) (c++) (set new-indent (skipwhite)))
(ul?) (do (pp :ul-branch) (finish-p) (set new-indent (parse-list ul? :ul new-indent)))
(ol?) (do (pp :ol-branch) (finish-p) (set new-indent (parse-list ol? :ol new-indent)))
(fcb?) (do (finish-p) (set new-indent (parse-fcb new-indent)))
(>= new-indent (+ 4 indent)) (do (finish-p) (set new-indent (parse-icb new-indent)))
(do (pp :default-p-line) (p-line)))
(pp [:bottom-of-parse-blocks-while
:new-indent new-indent
:indent indent]))
(pp :after-parse-blocks-while)
(finish-p)
new-indent))
# Handle first line specially for defn, defmacro, etc.
(when (= (chr "(") (in str 0))
(skipline)
(def first-line (string/slice str 0 (- cursor 1)))
(def fl-open (if has-color "\e[97m" ""))
(def fl-close (if has-color "\e[39m" ""))
(push [[(string fl-open first-line fl-close) (length first-line)]]))
(parse-blocks 0)
stack)
(comment
(def case-1-md
``
* A
* B
* C
``)
(doc-parse case-1-md)
# =>
@[@[:ul-loose
@[[["A" 1]]]
@[[["B" 1]]]
@[[["C" 1]]]]]
(def case-1a-md
(string
"* A\n"
"* B\n"
" \n"
"* C"))
(doc-parse case-1a-md)
# =>
@[@[:ul-loose
@[[["A" 1]]]
@[[["B" 1]]]
@[[["C" 1]]]]]
(def case-2-md
``
* A
* B
* C
``)
(doc-parse case-2-md)
# =>
@[@[:ul-loose
@[[["A" 1]]]
@[[["B" 1]]]
@[[["C" 1]]]]]
(def case-2a-md
(string
"* A\n"
"* B\n"
"\n"
" \n"
"* C"))
(doc-parse case-2a-md)
# =>
@[@[:ul-loose
@[[["A" 1]]]
@[[["B" 1]]]
@[[["C" 1]]]]]
(def case-3-md
``
* A
* B
* B.1
* B.2
* C
``)
(doc-parse case-3-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]
@[:ul-loose
@[[["B.1" 3]]]
@[[["B.2" 3]]]]]
@[[["C" 1]]]]]
(def case-4-md
``
* A
* B
* B.1
* B.2
* B.3
* C
``)
(doc-parse case-4-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]
@[:ul-loose
@[[["B.1" 3]]]
@[[["B.2" 3]]]
@[[["B.3" 3]]]]]
@[[["C" 1]]]]]
(def case-5-md
``
* A
* B
* B.1
* B.2 Hi there
There is some text here
* B.3
* C
``)
(doc-parse case-5-md)
# =>
@[@[:ul
@[[["A" 1]]]
@[[["B" 1]]
@[:ul-loose
@[[["B.1" 3]]]
@[[["B.2" 3] ["Hi" 2] ["there" 5] ["There" 5] ["is" 2]
["some" 4] ["text" 4] ["here" 4]]]
@[[["B.3" 3]]]]]
@[[["C" 1]]]]]
)
(defn doc-emit
[stack &opt width indent colorize]
(default indent 4)
(def max-width (- (or width (dyn *doc-width* 80)) 8))
(def has-color (if (not= nil colorize)
colorize
(dyn *doc-color*)))
# Emission state
(def buf @"")
(var current-column 0)
# Emission
(defn emit-indent [indent]
(def delta (- indent current-column))
(when (< 0 delta)
(buffer/push buf (string/repeat " " delta))
(set current-column indent)))
(defn emit-nl [&opt indent]
(buffer/push buf "\n")
(set current-column 0))
(defn emit-word [word indent &opt len]
(def last-byte (last buf))
(when (and
last-byte
(not= last-byte (chr "\n"))
(not= last-byte (chr " ")))
(buffer/push buf " ")
(++ current-column))
(default len (length word))
(when (and indent (> (+ 1 current-column len) max-width))
(emit-nl)
(emit-indent indent))
(buffer/push buf word)
(+= current-column len))
(defn emit-code
[code indent]
(def replacement (string "\n" (string/repeat " " (+ 4 indent))))
(emit-indent (+ 4 indent))
(buffer/push buf (string/replace-all "\n" replacement code))
(if (= (chr "\n") (last code))
(set current-column 0)
(emit-nl)))
(defn emit-node
[el indent]
(emit-indent indent)
(if (tuple? el)
(let [rep (string "\n" (string/repeat " " indent))]
(each [word len] el
(emit-word
(string/replace-all "\n" rep word)
indent
len))
(emit-nl))
(case (first el)
:ul (for i 1 (length el)
(if (> i 1) (emit-indent indent))
(emit-word "* " nil)
(each subel (get el i) (emit-node subel (+ 2 indent))))
:ul-loose
(for i 1 (length el)
(if (> i 1) (emit-indent indent))
(emit-word "* " nil)
(each subel (get el i) (emit-node subel (+ 2 indent)) (emit-nl)))
:ol (for i 1 (length el)
(if (> i 1) (emit-indent indent))
(def lab (string/format "%d. " i))
(emit-word lab nil)
(each subel (get el i) (emit-node subel (+ (length lab) indent))))
:ol-loose
(for i 1 (length el)
(if (> i 1) (emit-indent indent))
(def lab (string/format "%d. " i))
(emit-word lab nil)
(each subel (get el i) (emit-node subel (+ (length lab) indent)) (emit-nl)))
:cb (emit-code (get el 1) indent))))
(each el stack
(emit-nl)
(emit-node el indent))
buf)
(comment
(def case-0-md
``
* A
* B
* C
``)
(doc-emit (doc-parse case-0-md) 80 0)
# =>
(buffer
"\n"
"* A\n"
"* B\n"
"* C\n")
(def case-1-md
``
* A
* B
* C
``)
(doc-emit (doc-parse case-1-md))
# =>
(buffer
"\n"
" * A\n"
"\n"
" * B\n"
"\n"
" * C\n"
"\n")
(def case-2-md
``
1. A
2. B
3. C
``)
(doc-emit (doc-parse case-2-md) 80 0)
# =>
(buffer
"\n"
"1. A\n"
"\n"
"2. B\n"
"\n"
"3. C\n"
"\n")
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment