implement more markdown postprocessing

This commit is contained in:
xenia 2021-06-07 05:31:39 -04:00
parent 6d24b2fbd8
commit 7244b75526
1 changed files with 49 additions and 36 deletions

View File

@ -1,6 +1,6 @@
#lang racket/base
(require racket/list racket/match racket/port racket/set racket/string
(require racket/list racket/match racket/port racket/pretty racket/set racket/string
markdown markdown/display-xexpr markdown/toc
"compiler.rkt" "defs.rkt"
threading
@ -10,7 +10,7 @@
(prefix-in article: "templates/article.html.rkt"))
(struct input-doc [metadata text] #:transparent)
(struct ir-doc [metadata hashtags html] #:transparent)
(struct ir-doc [metadata html] #:transparent)
(define (read-input-doc [port (current-input-port)])
(define metadata (read port))
@ -19,9 +19,13 @@
(define text (port->string port))
(input-doc metadata text))
;; transforms the xexpr into an xexpr that includes links to hashtags
(define (extract-hashtags xexpr)
(define hashtags (mutable-set))
(define *xref-char* "^")
(define *user-char* "@")
(define *hashtag-char* "#")
;; transforms the xexpr into an xexpr that includes links to entities
(define (extract-text-starting-with xexpr start-char tag-name)
(define re (pregexp (string-append start-char "(\\p{L}|\\p{N})+")))
(define (process+ xexpr)
(match xexpr
;; do not descend into code or math
@ -31,57 +35,66 @@
(list (cons tag (cons attrs
(apply append
(map (lambda (child) (process+ child)) children)))))]
;; extract and transform hashtags
;; extract and transform the prefixed text
[(? string? str)
(define posns (regexp-match-positions* #px"#(\\p{L}|\\p{N})+" str))
(define posns (regexp-match-positions* re str))
(define-values [items last-pos]
(for/fold ([items '()] [last-pos 0]) ([pos (in-list posns)])
(define hashtag (substring str (car pos) (cdr pos)))
(set-add! hashtags hashtag)
(values (cons `(hashtag () ,hashtag)
(define value (substring str (car pos) (cdr pos)))
(values (cons `(,tag-name ([target ,(substring value 1)]) ,value)
(cons (substring str last-pos (car pos)) items))
(cdr pos))))
(reverse (cons (substring str last-pos) items))]))
(values (first (process+ xexpr)) hashtags))
(first (process+ xexpr)))
(define (extract-text-starting-with/many xexprs start-char tag-name)
(map (λ (xexpr) (extract-text-starting-with xexpr start-char tag-name)) xexprs))
;; xref links start with ^
(define (xref-link? x)
(and (string? x) (> (string-length x) 1) (char=? (string-ref x 0) #\^)))
;; extracts xref-prefixed links and converts them into the <xref> pseudo-tag
(define (extract-xrefs xexpr)
(match xexpr
[(list 'a attrs body ...)
(match (assoc 'href attrs)
[(list _ (? xref-link? xref))
(define target (substring xref 1))
`(xref ([target ,target]) ,@(map extract-xrefs body))]
[_ (apply list 'a attrs (map extract-xrefs body))])]
[(list tag attrs children ...)
(apply list tag attrs (map extract-xrefs children))]
[(? string? str) str]))
;; extracts prefixed links and converts them into the <xref> pseudo-tag
(define (extract-prefixed-links xexpr start-char tag-name)
(define (pred? s)
(and (string? s) (> (string-length s) 1) (string=? start-char (substring s 0 1))))
(define (extract-prefixed-links+ xexpr)
(match xexpr
[(list 'a attrs body ...)
(match (assoc 'href attrs)
[(list _ (? pred? value))
(define target (substring value 1))
`(,tag-name ([target ,target]) ,@(map extract-prefixed-links+ body))]
[_ (apply list 'a attrs (map extract-prefixed-links+ body))])]
[(list tag attrs children ...)
(apply list tag attrs (map extract-prefixed-links+ children))]
[(? string? str) str]))
(extract-prefixed-links+ xexpr))
(define (extract-prefixed-links/many xexprs start-char tag-name)
(map (λ (xexpr) (extract-prefixed-links xexpr start-char tag-name)) xexprs))
(define (input-doc->ir-doc doc)
(match-define (input-doc md text) doc)
(define output-raw (parse-markdown text))
(define output-cooked (mathml:transform-xexprs (syntax:transform-xexprs output-raw)))
(let-values ([(output-cooked hashtags) (extract-hashtags output-cooked)])
(let ([output-cooked (extract-xrefs output-cooked)])
(ir-doc md hashtags output-cooked))))
(define output-cooked
(~> output-raw syntax:transform-xexprs mathml:transform-xexprs
(extract-text-starting-with/many *hashtag-char* 'hashtag)
(extract-text-starting-with/many *user-char* 'user)
(extract-prefixed-links/many *xref-char* 'xref)
(extract-prefixed-links/many *user-char* 'user)))
(ir-doc md output-cooked))
;; lowers pseudo-tags into their final form
(define (lower-specials xexpr base-url)
(define (lower-specials* xexpr)
(match xexpr
;; hashtags
[(list 'hashtag _ hashtag)
`(a ([class "hashtag"]
[href ,(string-append base-url "/tag/" (substring hashtag 1))])
,hashtag)]
[(list 'hashtag (list (list 'target target)) body ...)
`(a ([class "hashtag"] [href ,(string-append base-url "/hashtag/" target)])
,@(map lower-specials* body))]
;; xrefs
[(list 'xref (list (list 'target target)) body ...)
`(a ([class "xref"] [href ,(string-append base-url "/xref/" target)])
,@(map lower-specials* body))]
;; users
[(list 'user (list (list 'target target)) body ...)
`(a ([class "user"] [href ,(string-append base-url "/user/" target)])
,@(map lower-specials* body))]
;; everything else
[(list tag attrs children ...)
(apply list tag attrs (map lower-specials* children))]
@ -89,7 +102,7 @@
(lower-specials* xexpr))
(define (ir-doc->page doc)
(match-define (ir-doc md hashtags content) doc)
(match-define (ir-doc md content) doc)
(let ([content (lower-specials content "https://awoo.systems")])
(define content-toc (toc content))