diff --git a/webcrawler b/webcrawler index 2de44ba..a539134 100755 --- a/webcrawler +++ b/webcrawler @@ -19,15 +19,46 @@ ;; christo pls do not track thanks (dnt . "1"))) -(define (run-webcrawler username password) - (printf-debug "the credentials are: ~s ~s\n" username password) - (define req - (make-http-req 'GET LOGIN-PATH DEFAULT-HDRS)) - (define sock (http-connect HOST #:headers DEFAULT-HDRS)) +(define (crawler-fetch/noclose req) + (define sock (http-connect HOST + #:headers (cons (cookie-jar->header (current-cookie-jar)) + DEFAULT-HDRS))) (define rsp (http-request sock req)) (update-cookie-jar! (current-cookie-jar) (http-msg-headers rsp)) - (displayln (current-cookie-jar)) - (http-close sock)) + (values rsp sock)) + +(define (crawler-fetch req) + (define-values [rsp sock] (crawler-fetch/noclose req)) + (http-close sock) + rsp) + +(define (crawler-fetch/xexpr req) + (define-values [rsp sock] (crawler-fetch/noclose req)) + (define xe (string->xexpr + (bytes->string/utf-8 + (for/fold ([res #""]) + ([chunk (in-http-body-chunks (http-msg-body rsp))]) + (bytes-append res chunk))))) + (http-close sock) + xe) + +(define (crawler-login username password) + (crawler-fetch (make-http-req 'GET LOGIN-PATH)) + (define form-body + (format "username=~a&password=~a&csrfmiddlewaretoken=~a&next=" + username password (cookie-jar-ref (current-cookie-jar) "csrftoken"))) + (crawler-fetch (http-set-body (make-http-req 'POST LOGIN-PATH) + (string->bytes/utf-8 form-body))) + (void)) + +(define (run-webcrawler username password) + (printf-debug "the credentials are: ~s ~s\n" username password) + (printf-debug "logging in...\n") + (crawler-login username password) + (printf-debug "logged in\n") + (define xe (crawler-fetch/xexpr (make-http-req 'GET ROOT-PATH))) + (displayln (find-flags xe)) + (displayln (find-hrefs xe))) (module+ main (command-line