capybara/fetch.rkt

121 lines
4.4 KiB
Racket
Raw Normal View History

2021-06-10 05:02:35 +00:00
#lang racket/base
(require racket/function racket/list racket/match racket/port net/base64 net/http-client
net/url-string json
html-parsing crypto crypto/libcrypto threading
"fetch-util.rkt")
(provide auth-fetch/ephem masto-fetch-json masto-fetch-embed)
(define (auth-fetch/ephem fetch-url)
(parameterize ([crypto-factories (list libcrypto-factory)])
(define key (generate-private-key 'rsa '((nbits 4096))))
(define-values [parsed-url ssl? host port path]
(parse-url fetch-url))
(define date (get-date))
(define host-for-header
(cond
[(if ssl? (= port 443) (= port 80)) host]
[else (format "~a:~a" host port)]))
(define signed-header-spec "(request-target) host date content-length accept")
(define signed-content
(format "(request-target): get ~a\nhost: ~a\ndate: ~a\ncontent-length: 0\naccept: application/activity+json"
path host-for-header date))
(define signature (base64-encode
(pk-sign key (digest 'sha256 signed-content)
#:pad 'pkcs1-v1.5 #:digest 'sha256)
#""))
(define algorithm "rsa-sha256")
(define http-signature (format "keyId=\"~a\",algorithm=\"~a\",headers=\"~a\",signature=\"~a\""
"foo://example-key" algorithm signed-header-spec signature))
(displayln http-signature)
;; GET /path HTTP/1.1
(define req-headers
(list
;; host: the host with the port unless it's default
;; accept-encoding: gzip,deflate
;; content-length: (length of content) | transfer-encoding: chunked (if body is a proc)
;; connection: close
(format "Date: ~a" date)
(format "Accept: application/activity+json")
(get-user-agent-header)
(format "Signature: ~a" http-signature)))
(define-values [status resp-headers body]
(http-sendrecv host path
#:ssl? ssl?
#:port port
#:version #"1.1"
#:method #"GET"
#:headers req-headers
#:data #f
#:content-decode '(gzip deflate)))
(displayln status)
(displayln resp-headers)
(displayln (port->bytes body))
(void "TODO")))
(define (masto-fetch-json fetch-url)
(define-values [parsed-url ssl? host port path]
(parse-url fetch-url))
(define-values [status resp-headers body]
(http-sendrecv host (format "~a.json" path)
#:ssl? ssl?
#:port port
#:version #"1.1"
#:method #"GET"
#:headers (list (get-user-agent-header))
#:data #f
#:content-decode '(gzip deflate)))
(~> body port->string string->jsexpr))
(define (masto-extract-embed-content html)
(match html
['() #f]
[(? string?) #f]
[(list 'div (list '@ (list 'class "activity-stream activity-stream--headless")) body ...)
body]
[(list _ _ children ...) (ormap masto-extract-embed-content children)]))
(define (transform-links base xexpr)
(match xexpr
[(list 'a attrs body ...)
(match (assoc 'href attrs)
[#f xexpr]
[(list _ href)
(define filtered (filter (λ (x) (not (eq? (first x) 'href))) attrs))
(apply list 'a (cons (list 'href (url->string (combine-url/relative base href))) filtered)
(map (curry transform-links base) body))])]
[(list tag attrs body ...)
(apply list tag attrs (map (curry transform-links base) body))]
[(? string?) xexpr]))
(define (masto-fetch-embed fetch-url)
(define-values [parsed-url ssl? host port path]
(parse-url fetch-url))
(define-values [status resp-headers body]
(http-sendrecv host (format "~a/embed" path)
#:ssl? ssl?
#:port port
#:version #"1.1"
#:method #"GET"
#:headers (list (get-user-agent-header))
#:data #f
#:content-decode '(gzip deflate)))
(~>>
(apply list 'article '([class "masto-embed"])
(~>> body html->xexp masto-extract-embed-content (map sxml->xexpr)))
2021-06-10 06:04:38 +00:00
(transform-links parsed-url) check-xexpr))
2021-06-10 05:02:35 +00:00
(module+ main
(require markdown/display-xexpr racket/pretty)
(display-xexpr (masto-fetch-embed "https://types.pl/@haskal/106382760904406469")))