fix sxml processing

This commit is contained in:
xenia 2021-06-10 02:04:38 -04:00
parent a56149b5bf
commit 1116e9fc9c
2 changed files with 32 additions and 14 deletions

View File

@ -1,9 +1,9 @@
#lang racket/base
(require racket/date racket/match racket/string
net/url-string)
(require racket/date racket/match racket/pretty racket/string
net/url-string xml)
(provide get-date parse-url get-user-agent-header)
(provide get-date parse-url get-user-agent-header sxml->xexpr check-xexpr)
(define (get-date)
(parameterize ([date-display-format 'rfc2822])
@ -25,3 +25,31 @@
(define (get-user-agent-header)
(format "User-Agent: Racket/~a (net/http-client) static-generator/0.1 trans/rights" (version)))
(define (sxml->xexpr sxml)
(define (sxml-attr->xexpr-attr attr)
(match attr
[(list (? symbol?) (? string?)) attr]
[(list (? symbol? s)) (list s (symbol->string s))]
[_ (error "invalid sxml attr" attr)]))
(match sxml
[(? string?) sxml]
['(& nbsp) "\u00a0"]
[(list '& _) (error "unhandled &-code" sxml)]
[(list tag (cons '@ attrs) body ...)
(cons tag (cons (map sxml-attr->xexpr-attr attrs) (map sxml->xexpr body)))]
[(list tag body ...)
(cons tag (cons '() (map sxml->xexpr body)))]))
(define (check-xexpr x)
(define (check-detailed-xexpr x)
(match x
[(? string?) (void)]
[(list tag (list (list (? symbol?) (? string?)) ...) body ...)
(void (map check-detailed-xexpr body))]
[_ (displayln "problem:") (pretty-write x)]))
(unless (xexpr? x)
(check-detailed-xexpr x)
(error "not xexpr!"))
x)

View File

@ -85,16 +85,6 @@
body]
[(list _ _ children ...) (ormap masto-extract-embed-content children)]))
(define (sxml->xexpr sxml)
(match sxml
[(? string?) sxml]
['(& nbsp) "\u00a0"]
[(list '& _) (error "unhandled &-code" sxml)]
[(list tag (cons '@ attrs) body ...)
(cons tag (cons attrs (map sxml->xexpr body)))]
[(list tag body ...)
(cons tag (cons '() (map sxml->xexpr body)))]))
(define (transform-links base xexpr)
(match xexpr
[(list 'a attrs body ...)
@ -123,7 +113,7 @@
(~>>
(apply list 'article '([class "masto-embed"])
(~>> body html->xexp masto-extract-embed-content (map sxml->xexpr)))
(transform-links parsed-url)))
(transform-links parsed-url) check-xexpr))
(module+ main
(require markdown/display-xexpr racket/pretty)