CS3700-project4/private/util.rkt

122 lines
3.8 KiB
Racket
Raw Permalink Normal View History

2020-04-11 04:32:34 +00:00
#lang racket
(require xml)
(provide debug-mode? printf-debug print-progress print-complete print-flag
current-cookie-jar update-cookie-jar! cookie-jar->header cookie-jar-ref
string->xexpr find-flags find-hrefs)
2020-04-11 04:32:34 +00:00
;; scheme is bad and it should feel bad
(define member? member)
;; Debug prints
;; Parameter for debug mode
(define debug-mode? (make-parameter #f))
;; ... ->
;; Prints the arguments as with printf, but only when debug mode is on
(define (printf-debug . args)
(when (debug-mode?) (apply printf args)))
;; Int Int Int Int ->
;; Prints progress to the console, only when debug mode is on
(define (print-progress total-pages in-flight crawled-pages num-flags)
(when (debug-mode?)
(printf "\r\x1b[KStatus: ~a/~a (~a in flight) | Flags: ~a"
crawled-pages total-pages in-flight num-flags)
(flush-output (current-output-port))))
;; ->
;; Prints a completion message to the console, only when debug mode is on
2020-04-11 08:22:05 +00:00
(define (print-complete total-pages num-flags)
2020-04-11 04:32:34 +00:00
(when (debug-mode?)
2020-04-11 08:22:05 +00:00
(printf "\r\x1b[KCrawl complete: ~a pages crawled, ~a flags found\n"
total-pages num-flags)))
2020-04-11 04:32:34 +00:00
;; Str ->
;; Prints a flag
(define (print-flag flag)
(if (debug-mode?)
2020-04-11 08:02:33 +00:00
(printf "\r\x1b[K~a\n" flag) ; make sure not to garble the debug progress
(displayln flag)))
2020-04-11 04:32:34 +00:00
;; Cookie jar
(struct cj [jar cache sema] #:transparent)
;; -> CookieJar
;; Creates empty cookie jar
;; Since Racket threads are preemptive (gross) we include a semaphore to guard against concurrent
;; modification
2020-04-11 04:32:34 +00:00
(define (make-cookie-jar)
(cj (make-hash) (box (cons 'cookie "")) (make-semaphore 1)))
;; Parameter for current cookie jar
(define current-cookie-jar (make-parameter (make-cookie-jar)))
;; CookieJar -> String
;; Gets a value from a cookie jar
(define (cookie-jar-ref jar key)
(call-with-semaphore
(cj-sema jar)
(lambda () (hash-ref (cj-jar jar) key))))
2020-04-11 04:32:34 +00:00
;; CookieJar [List-of (cons Symbol String)] ->
;; For a response with the given headers, finds any set-cookie headers and updates the jar
(define (update-cookie-jar! jar hdrs)
(call-with-semaphore
(cj-sema jar)
(lambda ()
(for ([hdr (in-list hdrs)])
(when (symbol=? 'set-cookie (car hdr))
(define kv (string-trim (first (string-split (cdr hdr) ";"))))
(match-define (list k v) (string-split kv "="))
(hash-set! (cj-jar jar) k v)))
(set-box!
(cj-cache jar)
(cons 'cookie
(string-join
(for/list ([(k v) (in-hash (cj-jar jar))])
2020-04-11 04:32:34 +00:00
(format "~a=~a" k v)) "; "))))))
;; CookieJar -> (cons Symbol String)
;; Converts this cookie jar into a cookie header for requests
(define (cookie-jar->header jar)
(call-with-semaphore (cj-sema jar) (lambda () (unbox (cj-cache jar)))))
2020-04-11 04:32:34 +00:00
;; HTML (XML) handling
;; String -> xexpr
;; Parses the given string as xml
(define (string->xexpr str)
(xml->xexpr (document-element (read-xml (open-input-string str)))))
;; xexpr -> [List-of String]
;; Finds all flags in the given xexpr
(define (find-flags xexpr)
(match xexpr
[(list _ (? (curry member? '(class "secret_flag"))) str)
2020-04-11 08:02:33 +00:00
(match (string-split (string-trim str) " ")
;; Match the exact flag format
2020-04-11 08:02:33 +00:00
[(list "FLAG:" (? (compose (curry = 64) string-length) flag))
(list flag)]
[_ '()])]
2020-04-11 04:32:34 +00:00
[(list tag params rst ...)
(foldl (lambda (x r) (append r (find-flags x))) '() rst)]
[_ '()]))
;; xexpr -> [List-of String]
;; Finds all <a> tag hrefs in the given xexpr
(define (find-hrefs xexpr)
(define (sub-find rst)
(foldl (lambda (x r) (append r (find-hrefs x))) '() rst))
(match xexpr
[(list 'a params rst ...)
(append
(for/list ([param (in-list params)]
#:when (symbol=? 'href (first param)))
(second param))
(sub-find rst))]
[(list tag params rst ...)
(sub-find rst)]
[_ '()]))