Fix some mishandling of headers to make cookiejar work
This commit is contained in:
parent
b8f0cc5179
commit
e3fefc7af4
|
@ -60,7 +60,7 @@
|
||||||
(cj-cache jar)
|
(cj-cache jar)
|
||||||
(cons 'cookie
|
(cons 'cookie
|
||||||
(string-join
|
(string-join
|
||||||
(for/list ([(k v) (in-hash jar)])
|
(for/list ([(k v) (in-hash (cj-jar jar))])
|
||||||
(format "~a=~a" k v)) "; "))))))
|
(format "~a=~a" k v)) "; "))))))
|
||||||
|
|
||||||
;; CookieJar -> (cons Symbol String)
|
;; CookieJar -> (cons Symbol String)
|
||||||
|
|
|
@ -67,9 +67,7 @@
|
||||||
;; hdrs : (listof (cons/c symbol? any?))
|
;; hdrs : (listof (cons/c symbol? any?))
|
||||||
(define (http-add-headers msg hdrs)
|
(define (http-add-headers msg hdrs)
|
||||||
(struct-copy http-msg msg
|
(struct-copy http-msg msg
|
||||||
[headers (append (http-msg-headers msg)
|
[headers (append (http-msg-headers msg) hdrs)]))
|
||||||
(for/list ([kv (in-list hdrs)])
|
|
||||||
(cons (car kv) (->bytes (cdr kv)))))]))
|
|
||||||
|
|
||||||
;; (http-set-body msg how) -> http-msg?
|
;; (http-set-body msg how) -> http-msg?
|
||||||
;; msg : http-msg?
|
;; msg : http-msg?
|
||||||
|
@ -82,6 +80,20 @@
|
||||||
(http-add-headers (struct-copy http-msg msg [body bdy*])
|
(http-add-headers (struct-copy http-msg msg [body bdy*])
|
||||||
(http-body-additional-headers bdy*)))
|
(http-body-additional-headers bdy*)))
|
||||||
|
|
||||||
|
;; We need an unchecked (no added headers) version because otherwise when parsing responses we add
|
||||||
|
;; duplicate headers
|
||||||
|
;; I was too lazy to abstract this i'm sorry milo ~ 🦈
|
||||||
|
|
||||||
|
;; (http-set-body/unchecked msg how) -> http-msg?
|
||||||
|
;; msg : http-msg?
|
||||||
|
;; how : (or/c http-body? (http-body? . -> . http-body?))
|
||||||
|
(define (http-set-body/unchecked msg how)
|
||||||
|
(define bdy*
|
||||||
|
(match how
|
||||||
|
[(? http-body? bdy*) bdy*]
|
||||||
|
[(? procedure? f) (f (http-msg-body msg))]))
|
||||||
|
(struct-copy http-msg msg [body bdy*]))
|
||||||
|
|
||||||
;; (write-http-msg msg [port]) -> void?
|
;; (write-http-msg msg [port]) -> void?
|
||||||
;; msg : http-msg?
|
;; msg : http-msg?
|
||||||
;; port : output-port?
|
;; port : output-port?
|
||||||
|
@ -162,7 +174,7 @@
|
||||||
(define (http-body-additional-headers bdy)
|
(define (http-body-additional-headers bdy)
|
||||||
(match bdy
|
(match bdy
|
||||||
[#f '()]
|
[#f '()]
|
||||||
[(? bytes? bs) `([content-length . ,(bytes-length bs)])]
|
[(? bytes? bs) `([content-length . ,(number->string (bytes-length bs))])]
|
||||||
[(? stream?) '([transfer-encoding . #"chunked"])]))
|
[(? stream?) '([transfer-encoding . #"chunked"])]))
|
||||||
|
|
||||||
;; ---------------------------------------------------------------------------------------
|
;; ---------------------------------------------------------------------------------------
|
||||||
|
@ -184,8 +196,7 @@
|
||||||
|
|
||||||
(define (string->header-kv s)
|
(define (string->header-kv s)
|
||||||
(match (regexp-match #px"^([^:]+):\\s*(.*)$" s)
|
(match (regexp-match #px"^([^:]+):\\s*(.*)$" s)
|
||||||
[(list _ k v) (cons (string->symbol (string-downcase k))
|
[(list _ k v) (cons (string->symbol (string-downcase k)) v)]
|
||||||
(->bytes v))]
|
|
||||||
[_ #f]))
|
[_ #f]))
|
||||||
|
|
||||||
(define (read-crlf-line port)
|
(define (read-crlf-line port)
|
||||||
|
@ -210,15 +221,15 @@
|
||||||
(define bdy (read-http-body (http-msg-header msg 'transfer-encoding)
|
(define bdy (read-http-body (http-msg-header msg 'transfer-encoding)
|
||||||
(http-msg-header msg 'content-length)
|
(http-msg-header msg 'content-length)
|
||||||
port))
|
port))
|
||||||
(http-set-body msg bdy))
|
(http-set-body/unchecked msg bdy))
|
||||||
|
|
||||||
;; (http-body-reader tenc clen [port]) -> http-body?
|
;; (http-body-reader tenc clen [port]) -> http-body?
|
||||||
;; tenc, clen : (or/c #f bytes?)
|
;; tenc, clen : (or/c #f bytes?)
|
||||||
;; port : input-port?
|
;; port : input-port?
|
||||||
(define (read-http-body tenc clen [port (current-input-port)])
|
(define (read-http-body tenc clen [port (current-input-port)])
|
||||||
(define len (and clen (string->int (bytes->string/utf-8 clen))))
|
(define len (and clen (string->int clen)))
|
||||||
(cond
|
(cond
|
||||||
[(equal? tenc #"chunked") (read-chunked-stream port)]
|
[(equal? tenc "chunked") (read-chunked-stream port)]
|
||||||
[(number? len) (read-bytes len port)]
|
[(number? len) (read-bytes len port)]
|
||||||
[(and (not tenc) (not len)) #f]
|
[(and (not tenc) (not len)) #f]
|
||||||
[else (raise-read-http-error "read-http-body: not sure how to read HTTP body")]))
|
[else (raise-read-http-error "read-http-body: not sure how to read HTTP body")]))
|
||||||
|
@ -226,6 +237,8 @@
|
||||||
;; input-port? -> [streamof bytes?]
|
;; input-port? -> [streamof bytes?]
|
||||||
(define (read-chunked-stream port)
|
(define (read-chunked-stream port)
|
||||||
(define len (string->number (read-crlf-line port)))
|
(define len (string->number (read-crlf-line port)))
|
||||||
|
(unless (integer? len)
|
||||||
|
(raise-read-http-error "read-chunked-stream: invalid chunked encoding detected"))
|
||||||
(define data (read-bytes len port))
|
(define data (read-bytes len port))
|
||||||
(match-define #"\r\n" (read-bytes 2 port))
|
(match-define #"\r\n" (read-bytes 2 port))
|
||||||
(if (zero? len)
|
(if (zero? len)
|
||||||
|
@ -250,9 +263,9 @@
|
||||||
#"Content-Length: 5\r\n"
|
#"Content-Length: 5\r\n"
|
||||||
#"\r\n"
|
#"\r\n"
|
||||||
#"hello"
|
#"hello"
|
||||||
(http-set-body (make-http-res 200 '([content-type . "text/html"]
|
(http-set-body/unchecked (make-http-res 200 '([content-type . "text/html"]
|
||||||
[content-length . 5]))
|
[content-length . "5"]))
|
||||||
#"hello"))
|
#"hello"))
|
||||||
|
|
||||||
(let ([msg (read-http-msg
|
(let ([msg (read-http-msg
|
||||||
(open-input-bytes (bytes-append #"HTTP/1.1 200 OK\r\n"
|
(open-input-bytes (bytes-append #"HTTP/1.1 200 OK\r\n"
|
||||||
|
|
|
@ -5,7 +5,9 @@
|
||||||
http-response-code-name
|
http-response-code-name
|
||||||
|
|
||||||
http-socket?
|
http-socket?
|
||||||
http-connect)
|
http-connect
|
||||||
|
http-request
|
||||||
|
http-close)
|
||||||
|
|
||||||
(require racket/tcp
|
(require racket/tcp
|
||||||
racket/match
|
racket/match
|
||||||
|
|
18
webcrawler
18
webcrawler
|
@ -9,19 +9,25 @@
|
||||||
; /_//_/ /_/\__,_/_/ /_/\__, / \__,_/ |__/|__/\__,_/
|
; /_//_/ /_/\__,_/_/ /_/\__, / \__,_/ |__/|__/\__,_/
|
||||||
; /____/
|
; /____/
|
||||||
|
|
||||||
(require smol-http)
|
(require smol-http
|
||||||
(require "private/util.rkt")
|
"private/util.rkt")
|
||||||
|
|
||||||
|
(define HOST "fring.ccs.neu.edu")
|
||||||
|
(define ROOT-PATH "/fakebook/")
|
||||||
|
(define LOGIN-PATH "/accounts/login/")
|
||||||
(define DEFAULT-HDRS '((user-agent . "🦈 hash-lang-uwu crawler v1.0")
|
(define DEFAULT-HDRS '((user-agent . "🦈 hash-lang-uwu crawler v1.0")
|
||||||
;; christo pls do not track thanks
|
;; christo pls do not track thanks
|
||||||
(dnt . "1")))
|
(dnt . "1")))
|
||||||
|
|
||||||
(define req
|
|
||||||
(make-http-req 'GET "/hello-world" DEFAULT-HDRS))
|
|
||||||
|
|
||||||
(define (run-webcrawler username password)
|
(define (run-webcrawler username password)
|
||||||
(printf-debug "the credentials are: ~s ~s\n" username password)
|
(printf-debug "the credentials are: ~s ~s\n" username password)
|
||||||
(write-http-msg req))
|
(define req
|
||||||
|
(make-http-req 'GET LOGIN-PATH DEFAULT-HDRS))
|
||||||
|
(define sock (http-connect HOST #:headers DEFAULT-HDRS))
|
||||||
|
(define rsp (http-request sock req))
|
||||||
|
(update-cookie-jar! (current-cookie-jar) (http-msg-headers rsp))
|
||||||
|
(displayln (current-cookie-jar))
|
||||||
|
(http-close sock))
|
||||||
|
|
||||||
(module+ main
|
(module+ main
|
||||||
(command-line
|
(command-line
|
||||||
|
|
Loading…
Reference in New Issue