From e3fefc7af4ba106ba9c7e5906ca79a00eae55ef5 Mon Sep 17 00:00:00 2001 From: haskal Date: Sat, 11 Apr 2020 01:09:26 -0400 Subject: [PATCH] Fix some mishandling of headers to make cookiejar work --- private/util.rkt | 2 +- smol-http/http-msg.rkt | 37 +++++++++++++++++++++++++------------ smol-http/main.rkt | 4 +++- webcrawler | 18 ++++++++++++------ 4 files changed, 41 insertions(+), 20 deletions(-) diff --git a/private/util.rkt b/private/util.rkt index 60167a5..adb77fa 100644 --- a/private/util.rkt +++ b/private/util.rkt @@ -60,7 +60,7 @@ (cj-cache jar) (cons 'cookie (string-join - (for/list ([(k v) (in-hash jar)]) + (for/list ([(k v) (in-hash (cj-jar jar))]) (format "~a=~a" k v)) "; ")))))) ;; CookieJar -> (cons Symbol String) diff --git a/smol-http/http-msg.rkt b/smol-http/http-msg.rkt index b5865d1..3b5677f 100644 --- a/smol-http/http-msg.rkt +++ b/smol-http/http-msg.rkt @@ -67,9 +67,7 @@ ;; hdrs : (listof (cons/c symbol? any?)) (define (http-add-headers msg hdrs) (struct-copy http-msg msg - [headers (append (http-msg-headers msg) - (for/list ([kv (in-list hdrs)]) - (cons (car kv) (->bytes (cdr kv)))))])) + [headers (append (http-msg-headers msg) hdrs)])) ;; (http-set-body msg how) -> http-msg? ;; msg : http-msg? @@ -82,6 +80,20 @@ (http-add-headers (struct-copy http-msg msg [body bdy*]) (http-body-additional-headers bdy*))) +;; We need an unchecked (no added headers) version because otherwise when parsing responses we add +;; duplicate headers +;; I was too lazy to abstract this i'm sorry milo ~ 🦈 + +;; (http-set-body/unchecked msg how) -> http-msg? +;; msg : http-msg? +;; how : (or/c http-body? (http-body? . -> . http-body?)) +(define (http-set-body/unchecked msg how) + (define bdy* + (match how + [(? http-body? bdy*) bdy*] + [(? procedure? f) (f (http-msg-body msg))])) + (struct-copy http-msg msg [body bdy*])) + ;; (write-http-msg msg [port]) -> void? ;; msg : http-msg? ;; port : output-port? @@ -162,7 +174,7 @@ (define (http-body-additional-headers bdy) (match bdy [#f '()] - [(? bytes? bs) `([content-length . ,(bytes-length bs)])] + [(? bytes? bs) `([content-length . ,(number->string (bytes-length bs))])] [(? stream?) '([transfer-encoding . #"chunked"])])) ;; --------------------------------------------------------------------------------------- @@ -184,8 +196,7 @@ (define (string->header-kv s) (match (regexp-match #px"^([^:]+):\\s*(.*)$" s) - [(list _ k v) (cons (string->symbol (string-downcase k)) - (->bytes v))] + [(list _ k v) (cons (string->symbol (string-downcase k)) v)] [_ #f])) (define (read-crlf-line port) @@ -210,15 +221,15 @@ (define bdy (read-http-body (http-msg-header msg 'transfer-encoding) (http-msg-header msg 'content-length) port)) - (http-set-body msg bdy)) + (http-set-body/unchecked msg bdy)) ;; (http-body-reader tenc clen [port]) -> http-body? ;; tenc, clen : (or/c #f bytes?) ;; port : input-port? (define (read-http-body tenc clen [port (current-input-port)]) - (define len (and clen (string->int (bytes->string/utf-8 clen)))) + (define len (and clen (string->int clen))) (cond - [(equal? tenc #"chunked") (read-chunked-stream port)] + [(equal? tenc "chunked") (read-chunked-stream port)] [(number? len) (read-bytes len port)] [(and (not tenc) (not len)) #f] [else (raise-read-http-error "read-http-body: not sure how to read HTTP body")])) @@ -226,6 +237,8 @@ ;; input-port? -> [streamof bytes?] (define (read-chunked-stream port) (define len (string->number (read-crlf-line port))) + (unless (integer? len) + (raise-read-http-error "read-chunked-stream: invalid chunked encoding detected")) (define data (read-bytes len port)) (match-define #"\r\n" (read-bytes 2 port)) (if (zero? len) @@ -250,9 +263,9 @@ #"Content-Length: 5\r\n" #"\r\n" #"hello" - (http-set-body (make-http-res 200 '([content-type . "text/html"] - [content-length . 5])) - #"hello")) + (http-set-body/unchecked (make-http-res 200 '([content-type . "text/html"] + [content-length . "5"])) + #"hello")) (let ([msg (read-http-msg (open-input-bytes (bytes-append #"HTTP/1.1 200 OK\r\n" diff --git a/smol-http/main.rkt b/smol-http/main.rkt index 627e714..e3871e2 100644 --- a/smol-http/main.rkt +++ b/smol-http/main.rkt @@ -5,7 +5,9 @@ http-response-code-name http-socket? - http-connect) + http-connect + http-request + http-close) (require racket/tcp racket/match diff --git a/webcrawler b/webcrawler index 04df549..2de44ba 100755 --- a/webcrawler +++ b/webcrawler @@ -9,19 +9,25 @@ ; /_//_/ /_/\__,_/_/ /_/\__, / \__,_/ |__/|__/\__,_/ ; /____/ -(require smol-http) -(require "private/util.rkt") +(require smol-http + "private/util.rkt") +(define HOST "fring.ccs.neu.edu") +(define ROOT-PATH "/fakebook/") +(define LOGIN-PATH "/accounts/login/") (define DEFAULT-HDRS '((user-agent . "🦈 hash-lang-uwu crawler v1.0") ;; christo pls do not track thanks (dnt . "1"))) -(define req - (make-http-req 'GET "/hello-world" DEFAULT-HDRS)) - (define (run-webcrawler username password) (printf-debug "the credentials are: ~s ~s\n" username password) - (write-http-msg req)) + (define req + (make-http-req 'GET LOGIN-PATH DEFAULT-HDRS)) + (define sock (http-connect HOST #:headers DEFAULT-HDRS)) + (define rsp (http-request sock req)) + (update-cookie-jar! (current-cookie-jar) (http-msg-headers rsp)) + (displayln (current-cookie-jar)) + (http-close sock)) (module+ main (command-line