Compare commits
2 Commits
master
...
form-urlen
Author | SHA1 | Date |
---|---|---|
Milo Turner | bdcfead62c | |
Milo Turner | 8c4b525cbf |
11
Makefile
11
Makefile
|
@ -1,6 +1,6 @@
|
||||||
.PHONY: all test run clean
|
.PHONY: all test run
|
||||||
|
|
||||||
SOURCES=Makefile README.md private/util.rkt secret_flags smol-http/http-msg.rkt smol-http/info.rkt smol-http/main.rkt smol-http/util.rkt webcrawler
|
SOURCES=$(shell git ls-tree -r master --name-only)
|
||||||
CCIS_TEAM_NAME=hash-lang-uwu
|
CCIS_TEAM_NAME=hash-lang-uwu
|
||||||
CCIS_PROJECT_NAME=project4
|
CCIS_PROJECT_NAME=project4
|
||||||
CCIS_SERVER=login.ccs.neu.edu
|
CCIS_SERVER=login.ccs.neu.edu
|
||||||
|
@ -25,7 +25,7 @@ endif
|
||||||
./webcrawler -d $(FB_USERNAME) $(FB_PASSWORD)
|
./webcrawler -d $(FB_USERNAME) $(FB_PASSWORD)
|
||||||
|
|
||||||
.setup:
|
.setup:
|
||||||
-raco pkg install smol-http/
|
raco pkg install smol-http/
|
||||||
@touch $@
|
@touch $@
|
||||||
|
|
||||||
upload: $(SOURCES)
|
upload: $(SOURCES)
|
||||||
|
@ -34,8 +34,3 @@ upload: $(SOURCES)
|
||||||
|
|
||||||
submit: upload
|
submit: upload
|
||||||
ssh $(CCIS_SERVER) -t -- "cd $(CCIS_PATH); make clean; $(CCIS_TURNIN) $(CCIS_PROJECT_NAME) ."
|
ssh $(CCIS_SERVER) -t -- "cd $(CCIS_PATH); make clean; $(CCIS_TURNIN) $(CCIS_PROJECT_NAME) ."
|
||||||
|
|
||||||
clean:
|
|
||||||
$(RM) .setup
|
|
||||||
find . \( -iname '*.zo' -o -iname '*.dep' \) -delete
|
|
||||||
find . -type d -iname compiled -delete
|
|
||||||
|
|
|
@ -3,8 +3,3 @@ fe661b328cdd23a3d8ddd84b7b4254c525860328dd40046dd8677f91dca6341d
|
||||||
8eccc00d948fc733c345c730ac86b8194aaa0376b5438b983ad00e1dd0fab73f
|
8eccc00d948fc733c345c730ac86b8194aaa0376b5438b983ad00e1dd0fab73f
|
||||||
388de86a629eb89093b27e9a92a29fd4e45c3aced219be1059238fd471849700
|
388de86a629eb89093b27e9a92a29fd4e45c3aced219be1059238fd471849700
|
||||||
38c7161b87ae13bc2c0a1499cf0ec4d0f6eace9f12cfb9042b26a969fd20dc48
|
38c7161b87ae13bc2c0a1499cf0ec4d0f6eace9f12cfb9042b26a969fd20dc48
|
||||||
9b4410bffa98dc2c3f5f466bdb9bce63cec1d6d498a55d91d873cab3544cc0f4
|
|
||||||
0fc45f2341dc966de6e350db2d0cc3070db99a5c66b06b0f8272b35bcfbf525d
|
|
||||||
7926b87e2a1ac49700f0bccdec585fe0874f009a8ef5c5d0fe9ef7135fcf66eb
|
|
||||||
d1584bcc2e42017db2c7146ab721d9172f2ae03d4ac42d8b6d6561125c7a212f
|
|
||||||
c5fbc705700769d6c6bfc182f9e8cc20b340f92d4be4ffda00eb311375973542
|
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
|
|
||||||
http-body?
|
http-body?
|
||||||
empty-http-body
|
empty-http-body
|
||||||
|
form-alist->http-body
|
||||||
write-http-body
|
write-http-body
|
||||||
force-http-body
|
force-http-body
|
||||||
in-http-body-chunks)
|
in-http-body-chunks)
|
||||||
|
@ -21,6 +22,8 @@
|
||||||
(require racket/match
|
(require racket/match
|
||||||
racket/port
|
racket/port
|
||||||
racket/stream
|
racket/stream
|
||||||
|
net/uri-codec
|
||||||
|
|
||||||
"./util.rkt")
|
"./util.rkt")
|
||||||
|
|
||||||
(module+ test
|
(module+ test
|
||||||
|
@ -154,6 +157,11 @@
|
||||||
|
|
||||||
(define empty-http-body #f)
|
(define empty-http-body #f)
|
||||||
|
|
||||||
|
;; (form-alist->http-body al) -> http-body?
|
||||||
|
;; al : (listof (cons/c symbol? string?))
|
||||||
|
(define (form-alist->http-body al)
|
||||||
|
(string->bytes/utf-8 (alist->form-urlencoded al)))
|
||||||
|
|
||||||
;; (write-http-body bdy [port]) -> void?
|
;; (write-http-body bdy [port]) -> void?
|
||||||
;; bdy : http-body?
|
;; bdy : http-body?
|
||||||
;; port : output-port?
|
;; port : output-port?
|
||||||
|
|
14
webcrawler
14
webcrawler
|
@ -86,12 +86,16 @@
|
||||||
|
|
||||||
;; Logs in with the given username and password
|
;; Logs in with the given username and password
|
||||||
(define (crawler-login username password)
|
(define (crawler-login username password)
|
||||||
|
(void (crawler-fetch 'POST LOGIN-PATH
|
||||||
|
(form-alist->http-body
|
||||||
|
`([username . ,username]
|
||||||
|
[password . ,password]
|
||||||
|
[csrfmiddlewaretoken . ,(crawler-get-csrf-token!)])))))
|
||||||
|
|
||||||
|
;; -> string?
|
||||||
|
(define (crawler-get-csrf-token!)
|
||||||
(crawler-fetch 'GET LOGIN-PATH)
|
(crawler-fetch 'GET LOGIN-PATH)
|
||||||
(define form-body
|
(cookie-jar-ref (current-cookie-jar) "csrftoken"))
|
||||||
(format "username=~a&password=~a&csrfmiddlewaretoken=~a&next="
|
|
||||||
username password (cookie-jar-ref (current-cookie-jar) "csrftoken")))
|
|
||||||
(crawler-fetch 'POST LOGIN-PATH (string->bytes/utf-8 form-body))
|
|
||||||
(void))
|
|
||||||
|
|
||||||
;; Checks if this is a URL we should crawl
|
;; Checks if this is a URL we should crawl
|
||||||
(define (crawler-valid-url? page-url)
|
(define (crawler-valid-url? page-url)
|
||||||
|
|
Loading…
Reference in New Issue