capybara/fetch-util.rkt

56 lines
1.9 KiB
Racket
Raw Normal View History

2021-06-10 05:02:35 +00:00
#lang racket/base
2021-06-10 06:04:38 +00:00
(require racket/date racket/match racket/pretty racket/string
net/url-string xml)
2021-06-10 05:02:35 +00:00
2021-06-10 06:04:38 +00:00
(provide get-date parse-url get-user-agent-header sxml->xexpr check-xexpr)
2021-06-10 05:02:35 +00:00
(define (get-date)
(parameterize ([date-display-format 'rfc2822])
;; aaaaaaaaaaaaaaaaaaaaa
(regexp-replace
#px"[^ ]+$"
(date->string (seconds->date (* 0.001 (current-inexact-milliseconds)) #f) #t)
"GMT")))
(define (parse-url fetch-url)
(printf "fetching page: ~a\n" fetch-url)
(define parsed-url (if (string? fetch-url) (string->url fetch-url) fetch-url))
(match parsed-url
[(url scheme user host port path-absolute? path query fragment)
(define ssl? (string=? (string-downcase scheme) "https"))
(values parsed-url ssl? host
(or port (if ssl? 443 80))
(format "/~a" (string-join (map path/param-path path) "/")))]))
(define (get-user-agent-header)
(format "User-Agent: Racket/~a (net/http-client) static-generator/0.1 trans/rights" (version)))
2021-06-10 06:04:38 +00:00
(define (sxml->xexpr sxml)
(define (sxml-attr->xexpr-attr attr)
(match attr
[(list (? symbol?) (? string?)) attr]
[(list (? symbol? s)) (list s (symbol->string s))]
[_ (error "invalid sxml attr" attr)]))
(match sxml
[(? string?) sxml]
['(& nbsp) "\u00a0"]
[(list '& _) (error "unhandled &-code" sxml)]
[(list tag (cons '@ attrs) body ...)
(cons tag (cons (map sxml-attr->xexpr-attr attrs) (map sxml->xexpr body)))]
[(list tag body ...)
(cons tag (cons '() (map sxml->xexpr body)))]))
(define (check-xexpr x)
(define (check-detailed-xexpr x)
(match x
[(? string?) (void)]
[(list tag (list (list (? symbol?) (? string?)) ...) body ...)
(void (map check-detailed-xexpr body))]
[_ (displayln "problem:") (pretty-write x)]))
(unless (xexpr? x)
(check-detailed-xexpr x)
(error "not xexpr!"))
x)