2020-04-10 20:31:41 +00:00
|
|
|
#!/usr/bin/env racket
|
2020-04-10 23:12:50 +00:00
|
|
|
#lang racket
|
|
|
|
; vim: ft=racket
|
2020-04-10 20:36:38 +00:00
|
|
|
|
2020-04-11 04:32:34 +00:00
|
|
|
; __ __ __
|
|
|
|
; __/ // /_/ /___ _____ ____ _ __ ___ ____ __
|
|
|
|
; /_ _ __/ / __ `/ __ \/ __ `/ / / / / | /| / / / / /
|
|
|
|
; /_ _ __/ / /_/ / / / / /_/ / / /_/ /| |/ |/ / /_/ /
|
|
|
|
; /_//_/ /_/\__,_/_/ /_/\__, / \__,_/ |__/|__/\__,_/
|
|
|
|
; /____/
|
|
|
|
|
2020-04-11 05:09:26 +00:00
|
|
|
(require smol-http
|
|
|
|
"private/util.rkt")
|
2020-04-11 03:17:20 +00:00
|
|
|
|
2020-04-11 05:09:26 +00:00
|
|
|
(define HOST "fring.ccs.neu.edu")
|
|
|
|
(define ROOT-PATH "/fakebook/")
|
|
|
|
(define LOGIN-PATH "/accounts/login/")
|
2020-04-10 23:23:14 +00:00
|
|
|
(define DEFAULT-HDRS '((user-agent . "🦈 hash-lang-uwu crawler v1.0")
|
|
|
|
;; christo pls do not track thanks
|
|
|
|
(dnt . "1")))
|
|
|
|
|
2020-04-11 05:46:34 +00:00
|
|
|
(define (crawler-fetch/noclose req)
|
|
|
|
(define sock (http-connect HOST
|
|
|
|
#:headers (cons (cookie-jar->header (current-cookie-jar))
|
|
|
|
DEFAULT-HDRS)))
|
2020-04-11 05:09:26 +00:00
|
|
|
(define rsp (http-request sock req))
|
|
|
|
(update-cookie-jar! (current-cookie-jar) (http-msg-headers rsp))
|
2020-04-11 05:46:34 +00:00
|
|
|
(values rsp sock))
|
|
|
|
|
|
|
|
(define (crawler-fetch req)
|
|
|
|
(define-values [rsp sock] (crawler-fetch/noclose req))
|
|
|
|
(http-close sock)
|
|
|
|
rsp)
|
|
|
|
|
|
|
|
(define (crawler-fetch/xexpr req)
|
|
|
|
(define-values [rsp sock] (crawler-fetch/noclose req))
|
|
|
|
(define xe (string->xexpr
|
|
|
|
(bytes->string/utf-8
|
|
|
|
(for/fold ([res #""])
|
|
|
|
([chunk (in-http-body-chunks (http-msg-body rsp))])
|
|
|
|
(bytes-append res chunk)))))
|
|
|
|
(http-close sock)
|
|
|
|
xe)
|
|
|
|
|
|
|
|
(define (crawler-login username password)
|
|
|
|
(crawler-fetch (make-http-req 'GET LOGIN-PATH))
|
|
|
|
(define form-body
|
|
|
|
(format "username=~a&password=~a&csrfmiddlewaretoken=~a&next="
|
|
|
|
username password (cookie-jar-ref (current-cookie-jar) "csrftoken")))
|
|
|
|
(crawler-fetch (http-set-body (make-http-req 'POST LOGIN-PATH)
|
|
|
|
(string->bytes/utf-8 form-body)))
|
|
|
|
(void))
|
|
|
|
|
|
|
|
(define (run-webcrawler username password)
|
|
|
|
(printf-debug "the credentials are: ~s ~s\n" username password)
|
|
|
|
(printf-debug "logging in...\n")
|
|
|
|
(crawler-login username password)
|
|
|
|
(printf-debug "logged in\n")
|
|
|
|
(define xe (crawler-fetch/xexpr (make-http-req 'GET ROOT-PATH)))
|
|
|
|
(displayln (find-flags xe))
|
|
|
|
(displayln (find-hrefs xe)))
|
2020-04-10 23:12:50 +00:00
|
|
|
|
|
|
|
(module+ main
|
|
|
|
(command-line
|
|
|
|
#:program "webcrawler"
|
2020-04-11 00:13:04 +00:00
|
|
|
#:once-each
|
|
|
|
[("-d") "Debug mode" (debug-mode? #t)]
|
2020-04-10 23:12:50 +00:00
|
|
|
#:args
|
|
|
|
(username password)
|
|
|
|
(run-webcrawler username password)))
|