Fix some mishandling of headers to make cookiejar work
This commit is contained in:
parent
b8f0cc5179
commit
e3fefc7af4
|
@ -60,7 +60,7 @@
|
|||
(cj-cache jar)
|
||||
(cons 'cookie
|
||||
(string-join
|
||||
(for/list ([(k v) (in-hash jar)])
|
||||
(for/list ([(k v) (in-hash (cj-jar jar))])
|
||||
(format "~a=~a" k v)) "; "))))))
|
||||
|
||||
;; CookieJar -> (cons Symbol String)
|
||||
|
|
|
@ -67,9 +67,7 @@
|
|||
;; hdrs : (listof (cons/c symbol? any?))
|
||||
(define (http-add-headers msg hdrs)
|
||||
(struct-copy http-msg msg
|
||||
[headers (append (http-msg-headers msg)
|
||||
(for/list ([kv (in-list hdrs)])
|
||||
(cons (car kv) (->bytes (cdr kv)))))]))
|
||||
[headers (append (http-msg-headers msg) hdrs)]))
|
||||
|
||||
;; (http-set-body msg how) -> http-msg?
|
||||
;; msg : http-msg?
|
||||
|
@ -82,6 +80,20 @@
|
|||
(http-add-headers (struct-copy http-msg msg [body bdy*])
|
||||
(http-body-additional-headers bdy*)))
|
||||
|
||||
;; We need an unchecked (no added headers) version because otherwise when parsing responses we add
|
||||
;; duplicate headers
|
||||
;; I was too lazy to abstract this i'm sorry milo ~ 🦈
|
||||
|
||||
;; (http-set-body/unchecked msg how) -> http-msg?
|
||||
;; msg : http-msg?
|
||||
;; how : (or/c http-body? (http-body? . -> . http-body?))
|
||||
(define (http-set-body/unchecked msg how)
|
||||
(define bdy*
|
||||
(match how
|
||||
[(? http-body? bdy*) bdy*]
|
||||
[(? procedure? f) (f (http-msg-body msg))]))
|
||||
(struct-copy http-msg msg [body bdy*]))
|
||||
|
||||
;; (write-http-msg msg [port]) -> void?
|
||||
;; msg : http-msg?
|
||||
;; port : output-port?
|
||||
|
@ -162,7 +174,7 @@
|
|||
(define (http-body-additional-headers bdy)
|
||||
(match bdy
|
||||
[#f '()]
|
||||
[(? bytes? bs) `([content-length . ,(bytes-length bs)])]
|
||||
[(? bytes? bs) `([content-length . ,(number->string (bytes-length bs))])]
|
||||
[(? stream?) '([transfer-encoding . #"chunked"])]))
|
||||
|
||||
;; ---------------------------------------------------------------------------------------
|
||||
|
@ -184,8 +196,7 @@
|
|||
|
||||
(define (string->header-kv s)
|
||||
(match (regexp-match #px"^([^:]+):\\s*(.*)$" s)
|
||||
[(list _ k v) (cons (string->symbol (string-downcase k))
|
||||
(->bytes v))]
|
||||
[(list _ k v) (cons (string->symbol (string-downcase k)) v)]
|
||||
[_ #f]))
|
||||
|
||||
(define (read-crlf-line port)
|
||||
|
@ -210,15 +221,15 @@
|
|||
(define bdy (read-http-body (http-msg-header msg 'transfer-encoding)
|
||||
(http-msg-header msg 'content-length)
|
||||
port))
|
||||
(http-set-body msg bdy))
|
||||
(http-set-body/unchecked msg bdy))
|
||||
|
||||
;; (http-body-reader tenc clen [port]) -> http-body?
|
||||
;; tenc, clen : (or/c #f bytes?)
|
||||
;; port : input-port?
|
||||
(define (read-http-body tenc clen [port (current-input-port)])
|
||||
(define len (and clen (string->int (bytes->string/utf-8 clen))))
|
||||
(define len (and clen (string->int clen)))
|
||||
(cond
|
||||
[(equal? tenc #"chunked") (read-chunked-stream port)]
|
||||
[(equal? tenc "chunked") (read-chunked-stream port)]
|
||||
[(number? len) (read-bytes len port)]
|
||||
[(and (not tenc) (not len)) #f]
|
||||
[else (raise-read-http-error "read-http-body: not sure how to read HTTP body")]))
|
||||
|
@ -226,6 +237,8 @@
|
|||
;; input-port? -> [streamof bytes?]
|
||||
(define (read-chunked-stream port)
|
||||
(define len (string->number (read-crlf-line port)))
|
||||
(unless (integer? len)
|
||||
(raise-read-http-error "read-chunked-stream: invalid chunked encoding detected"))
|
||||
(define data (read-bytes len port))
|
||||
(match-define #"\r\n" (read-bytes 2 port))
|
||||
(if (zero? len)
|
||||
|
@ -250,9 +263,9 @@
|
|||
#"Content-Length: 5\r\n"
|
||||
#"\r\n"
|
||||
#"hello"
|
||||
(http-set-body (make-http-res 200 '([content-type . "text/html"]
|
||||
[content-length . 5]))
|
||||
#"hello"))
|
||||
(http-set-body/unchecked (make-http-res 200 '([content-type . "text/html"]
|
||||
[content-length . "5"]))
|
||||
#"hello"))
|
||||
|
||||
(let ([msg (read-http-msg
|
||||
(open-input-bytes (bytes-append #"HTTP/1.1 200 OK\r\n"
|
||||
|
|
|
@ -5,7 +5,9 @@
|
|||
http-response-code-name
|
||||
|
||||
http-socket?
|
||||
http-connect)
|
||||
http-connect
|
||||
http-request
|
||||
http-close)
|
||||
|
||||
(require racket/tcp
|
||||
racket/match
|
||||
|
|
18
webcrawler
18
webcrawler
|
@ -9,19 +9,25 @@
|
|||
; /_//_/ /_/\__,_/_/ /_/\__, / \__,_/ |__/|__/\__,_/
|
||||
; /____/
|
||||
|
||||
(require smol-http)
|
||||
(require "private/util.rkt")
|
||||
(require smol-http
|
||||
"private/util.rkt")
|
||||
|
||||
(define HOST "fring.ccs.neu.edu")
|
||||
(define ROOT-PATH "/fakebook/")
|
||||
(define LOGIN-PATH "/accounts/login/")
|
||||
(define DEFAULT-HDRS '((user-agent . "🦈 hash-lang-uwu crawler v1.0")
|
||||
;; christo pls do not track thanks
|
||||
(dnt . "1")))
|
||||
|
||||
(define req
|
||||
(make-http-req 'GET "/hello-world" DEFAULT-HDRS))
|
||||
|
||||
(define (run-webcrawler username password)
|
||||
(printf-debug "the credentials are: ~s ~s\n" username password)
|
||||
(write-http-msg req))
|
||||
(define req
|
||||
(make-http-req 'GET LOGIN-PATH DEFAULT-HDRS))
|
||||
(define sock (http-connect HOST #:headers DEFAULT-HDRS))
|
||||
(define rsp (http-request sock req))
|
||||
(update-cookie-jar! (current-cookie-jar) (http-msg-headers rsp))
|
||||
(displayln (current-cookie-jar))
|
||||
(http-close sock))
|
||||
|
||||
(module+ main
|
||||
(command-line
|
||||
|
|
Loading…
Reference in New Issue