From 1116e9fc9c3d88e610067a9637f6a35d58584185 Mon Sep 17 00:00:00 2001 From: haskal Date: Thu, 10 Jun 2021 02:04:38 -0400 Subject: [PATCH] fix sxml processing --- fetch-util.rkt | 34 +++++++++++++++++++++++++++++++--- fetch.rkt | 12 +----------- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/fetch-util.rkt b/fetch-util.rkt index f937c87..569fc64 100644 --- a/fetch-util.rkt +++ b/fetch-util.rkt @@ -1,9 +1,9 @@ #lang racket/base -(require racket/date racket/match racket/string - net/url-string) +(require racket/date racket/match racket/pretty racket/string + net/url-string xml) -(provide get-date parse-url get-user-agent-header) +(provide get-date parse-url get-user-agent-header sxml->xexpr check-xexpr) (define (get-date) (parameterize ([date-display-format 'rfc2822]) @@ -25,3 +25,31 @@ (define (get-user-agent-header) (format "User-Agent: Racket/~a (net/http-client) static-generator/0.1 trans/rights" (version))) + +(define (sxml->xexpr sxml) + (define (sxml-attr->xexpr-attr attr) + (match attr + [(list (? symbol?) (? string?)) attr] + [(list (? symbol? s)) (list s (symbol->string s))] + [_ (error "invalid sxml attr" attr)])) + (match sxml + [(? string?) sxml] + ['(& nbsp) "\u00a0"] + [(list '& _) (error "unhandled &-code" sxml)] + [(list tag (cons '@ attrs) body ...) + (cons tag (cons (map sxml-attr->xexpr-attr attrs) (map sxml->xexpr body)))] + [(list tag body ...) + (cons tag (cons '() (map sxml->xexpr body)))])) + +(define (check-xexpr x) + (define (check-detailed-xexpr x) + (match x + [(? string?) (void)] + [(list tag (list (list (? symbol?) (? string?)) ...) body ...) + (void (map check-detailed-xexpr body))] + [_ (displayln "problem:") (pretty-write x)])) + (unless (xexpr? x) + (check-detailed-xexpr x) + (error "not xexpr!")) + x) + diff --git a/fetch.rkt b/fetch.rkt index 2d626ad..74cb5a0 100644 --- a/fetch.rkt +++ b/fetch.rkt @@ -85,16 +85,6 @@ body] [(list _ _ children ...) (ormap masto-extract-embed-content children)])) -(define (sxml->xexpr sxml) - (match sxml - [(? string?) sxml] - ['(& nbsp) "\u00a0"] - [(list '& _) (error "unhandled &-code" sxml)] - [(list tag (cons '@ attrs) body ...) - (cons tag (cons attrs (map sxml->xexpr body)))] - [(list tag body ...) - (cons tag (cons '() (map sxml->xexpr body)))])) - (define (transform-links base xexpr) (match xexpr [(list 'a attrs body ...) @@ -123,7 +113,7 @@ (~>> (apply list 'article '([class "masto-embed"]) (~>> body html->xexp masto-extract-embed-content (map sxml->xexpr))) - (transform-links parsed-url))) + (transform-links parsed-url) check-xexpr)) (module+ main (require markdown/display-xexpr racket/pretty)