fix issues with exn:break handling
This commit is contained in:
parent
d4482794ab
commit
0ae728a278
|
@ -16,52 +16,60 @@
|
|||
;; You should have received a copy of the GNU Affero General Public License
|
||||
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
(require racket/contract racket/fasl racket/file racket/match racket/port racket/string racket/unit
|
||||
(require racket/async-channel racket/bool racket/contract racket/fasl racket/file racket/function
|
||||
racket/match racket/port racket/string racket/unit
|
||||
"comms.rkt" "info.rkt" "logging.rkt" "not-crypto.rkt" "manifest.rkt" "protocol.rkt"
|
||||
"static-support.rkt")
|
||||
|
||||
(define-logger agent #:parent global-logger)
|
||||
|
||||
(define (get-config.linux-gnu)
|
||||
(call-with-input-file "/proc/self/exe"
|
||||
(lambda (in)
|
||||
(file-position in eof)
|
||||
(define len (file-position in))
|
||||
(file-position in (- len 4))
|
||||
(define offset (integer-bytes->integer (port->bytes in) #f #t))
|
||||
(file-position in (- len offset))
|
||||
(fasl->s-exp in))))
|
||||
;; global variables, yeet
|
||||
|
||||
(struct assignment [id task-id manifest file-hash work-range] #:transparent)
|
||||
(define incoming-queue (make-async-channel))
|
||||
|
||||
|
||||
;; main loop
|
||||
(define (agent-loop)
|
||||
; (thread (lambda ()
|
||||
; (log-agent-info "downloading assignment ~a" aid)
|
||||
; (define data (get-project-file tid))
|
||||
; (log-agent-info "assignment data: ~s" data)
|
||||
; (log-agent-info "simulating assignment ~a" aid)
|
||||
; (sleep 10)
|
||||
; (log-agent-info "sending completion ~a" aid)
|
||||
; (agent-report-state aid 'complete)))
|
||||
(sleep 10)
|
||||
(agent-loop))
|
||||
|
||||
|
||||
;; rpc impl
|
||||
|
||||
(define (enforce-subject type)
|
||||
(unless (symbol=? type (node-type (current-from-node)))
|
||||
(error "unauthorized")))
|
||||
|
||||
(define/contract (push-assignment aid tid mf-raw file-hash assign-data)
|
||||
(-> integer? integer? list? bytes? (listof pair?) void?)
|
||||
(enforce-subject 'server)
|
||||
(log-agent-info "got push-assignment ~a ~a ~s ~a" aid mf-raw file-hash assign-data)
|
||||
(thread (lambda ()
|
||||
(log-agent-info "downloading assignment ~a" aid)
|
||||
(define data (get-project-file tid))
|
||||
(log-agent-info "assignment data: ~s" data)
|
||||
(log-agent-info "simulating assignment ~a" aid)
|
||||
(sleep 10)
|
||||
(log-agent-info "sending completion ~a" aid)
|
||||
(agent-report-state aid 'complete)))
|
||||
|
||||
(async-channel-put
|
||||
incoming-queue (cons 'new (assignment aid tid (parse-manifest mf-raw) file-hash assign-data)))
|
||||
(void))
|
||||
|
||||
(define/contract (cancel-assignment aid)
|
||||
(-> integer? void?)
|
||||
(enforce-subject 'server)
|
||||
(log-agent-info "got cancel-assignment ~a" aid)
|
||||
(async-channel-put incoming-queue (cons 'cancel aid))
|
||||
(void))
|
||||
|
||||
(define/contract (cancel-all-assignments)
|
||||
(-> void?)
|
||||
(enforce-subject 'server)
|
||||
(log-agent-info "got cancel-all-assignments")
|
||||
(async-channel-put incoming-queue 'cancel-all)
|
||||
(void))
|
||||
|
||||
;; agent impl unit
|
||||
|
@ -77,6 +85,16 @@
|
|||
(install-logging!)
|
||||
(log-agent-info "starting crossfire-agent v~a" (#%info-lookup 'version))
|
||||
|
||||
(define (get-config.linux-gnu)
|
||||
(call-with-input-file "/proc/self/exe"
|
||||
(lambda (in)
|
||||
(file-position in eof)
|
||||
(define len (file-position in))
|
||||
(file-position in (- len 4))
|
||||
(define offset (integer-bytes->integer (port->bytes in) #f #t))
|
||||
(file-position in (- len offset))
|
||||
(fasl->s-exp in))))
|
||||
|
||||
(match-define (list agent-node server-node)
|
||||
(if (static-ffi-available?)
|
||||
(match (string-split (static-ffi-arch) "-")
|
||||
|
@ -98,13 +116,20 @@
|
|||
(comms-set-node-info (current-comms) server-node)
|
||||
|
||||
(log-agent-info "connecting to server...")
|
||||
(let loop ([sleep-time 1])
|
||||
(with-handlers ([exn? (lambda (ex)
|
||||
(log-agent-error "error connecting to server: ~a" ex)
|
||||
(sleep sleep-time)
|
||||
(loop (min 120 (* sleep-time 2))))])
|
||||
(comms-connect (current-comms) (node-id server-node))
|
||||
(agent-report-state #f #f)))
|
||||
(log-agent-info "connected! ready to do stuff")
|
||||
(with-handlers ([exn:break? (lambda (_)
|
||||
(log-agent-info "connection cancelled")
|
||||
(exit))])
|
||||
(let loop ([sleep-time 1])
|
||||
(define maybe-exn
|
||||
(with-handlers ([exn:fail? identity])
|
||||
(comms-connect (current-comms) (node-id server-node))
|
||||
(agent-report-state #f #f)
|
||||
#f))
|
||||
(when maybe-exn
|
||||
(log-agent-error "error connecting to server")
|
||||
((error-display-handler) (exn-message maybe-exn) maybe-exn)
|
||||
(sleep sleep-time)
|
||||
(loop (min 120 (* sleep-time 2))))))
|
||||
|
||||
(log-agent-info "connected! ready to do stuff")
|
||||
(agent-loop))
|
||||
|
|
|
@ -112,7 +112,7 @@
|
|||
|
||||
;; handles tcp data in
|
||||
(define (handle-in-msg)
|
||||
(match (with-handlers ([exn? (lambda (_) #f)]) (fasl->s-exp in))
|
||||
(match (with-handlers ([exn:fail? (lambda (_) #f)]) (fasl->s-exp in))
|
||||
[(locked fasl nonce mac)
|
||||
(match (crypto-unlock session-key nonce mac fasl)
|
||||
[#f (error "corrupted message from peer" (node-id peer-data))]
|
||||
|
@ -179,7 +179,7 @@
|
|||
(thread-wait new-thd)
|
||||
(custodian-shutdown-all cust)
|
||||
(crypto-wipe session-key)
|
||||
(with-handlers ([exn? void])
|
||||
(with-handlers ([exn:fail? void])
|
||||
(thread-sendrecv el-thread 'deregister-channel (node-id peer-data)))))
|
||||
new-thd)
|
||||
|
||||
|
@ -200,7 +200,7 @@
|
|||
|
||||
;; starts the tcp listener
|
||||
(define (handle-listen from port)
|
||||
(with-handlers ([exn? (lambda (ex) (thread-send from ex #f))])
|
||||
(with-handlers ([exn:fail? (lambda (ex) (thread-send from ex #f))])
|
||||
(when (false? listener-thd)
|
||||
(set! listener (tcp-listen port 4 #t))
|
||||
(log-comms-info "listening on port ~a" port)
|
||||
|
@ -219,7 +219,7 @@
|
|||
(define (handle-connect from id)
|
||||
(thread
|
||||
(lambda ()
|
||||
(with-handlers ([exn? (lambda (ex) (thread-send from ex #f))])
|
||||
(with-handlers ([exn:fail? (lambda (ex) (thread-send from ex #f))])
|
||||
(match (hash-ref node-registry id #f)
|
||||
[#f (thread-send from (make-error "no such node" id) #f)]
|
||||
[(node id name type pubkey seckey host port)
|
||||
|
@ -350,10 +350,14 @@
|
|||
(raise ex))
|
||||
(sleep retry-delay)
|
||||
(comms-dispatch-msg/retry comms to-id msg (sub1 tries) retry-delay))
|
||||
(with-handlers ([exn? do-retry])
|
||||
(unless (comms-channel-available? comms to-id)
|
||||
(comms-connect comms to-id))
|
||||
(comms-dispatch-msg comms to-id msg)))
|
||||
(define maybe-exn
|
||||
(with-handlers ([exn:fail? identity])
|
||||
(unless (comms-channel-available? comms to-id)
|
||||
(comms-connect comms to-id))
|
||||
(comms-dispatch-msg comms to-id msg)
|
||||
#f))
|
||||
(when maybe-exn
|
||||
(do-retry maybe-exn)))
|
||||
|
||||
(provide make-comms comms-listen comms-connect comms-get-node-info comms-set-node-info
|
||||
comms-delete-node comms-channel-available? comms-shutdown)
|
||||
|
@ -404,7 +408,7 @@
|
|||
(define (recv-transaction from key to-id transaction)
|
||||
(define (cleanup)
|
||||
(thread-sendrecv tm-thread 'deregister-response key))
|
||||
(with-handlers ([exn? (lambda (ex)
|
||||
(with-handlers ([exn:fail? (lambda (ex)
|
||||
(cleanup) (thread-send from ex #f))])
|
||||
(thread-receive) ;; go token
|
||||
(comms-dispatch-msg/retry comms to-id transaction)
|
||||
|
@ -431,19 +435,21 @@
|
|||
(define (respond data)
|
||||
(define resp
|
||||
(msg:transaction (node-id my-node) trans-id #f rpc-id (trans-data-serialize data)))
|
||||
(with-handlers ([exn?
|
||||
(with-handlers ([exn:fail?
|
||||
(lambda (ex)
|
||||
((error-display-handler) "failed to dispatch transaction response" ex))])
|
||||
(comms-dispatch-msg/retry comms from-id resp)))
|
||||
|
||||
(with-handlers ([exn? respond])
|
||||
(define arg-data (trans-data-deserialize rpc-data))
|
||||
(define result
|
||||
(parameterize ([current-from-node (comms-get-node-info comms from-id)])
|
||||
;; TODO : apply timeout on the handler function?
|
||||
;; we don't want this thread to potentially hang forever if there's some sort of deadlock
|
||||
(apply func arg-data)))
|
||||
(respond result)))
|
||||
(respond
|
||||
(with-handlers ([exn:fail? identity])
|
||||
(define arg-data (trans-data-deserialize rpc-data))
|
||||
(define result
|
||||
(parameterize ([current-from-node (comms-get-node-info comms from-id)])
|
||||
;; TODO : apply timeout on the handler function?
|
||||
;; we don't want this thread to potentially hang forever if there's some sort of
|
||||
;; deadlock
|
||||
(apply func arg-data)))
|
||||
result)))
|
||||
|
||||
(define (handle-thread-msg)
|
||||
(match-define (cons from (cons type data)) (thread-receive))
|
||||
|
|
|
@ -18,8 +18,8 @@
|
|||
|
||||
(require db/base db/sqlite3
|
||||
data/queue racket/async-channel racket/bool racket/contract racket/fasl racket/file
|
||||
racket/list racket/logging racket/match racket/path racket/random racket/runtime-path
|
||||
racket/set racket/string racket/unit srfi/19
|
||||
racket/function racket/list racket/logging racket/match racket/path racket/random
|
||||
racket/runtime-path racket/set racket/string racket/unit srfi/19
|
||||
north/base north/adapter/base north/adapter/sqlite
|
||||
"comms.rkt" "info.rkt" "logging.rkt" "manifest.rkt" "not-crypto.rkt" "pattern.rkt" "protocol.rkt"
|
||||
;; port-fsync
|
||||
|
@ -332,7 +332,7 @@
|
|||
(define/contract (get-project-file taskid)
|
||||
(-> integer? bytes?)
|
||||
;; TODO : streaming interface
|
||||
(with-handlers ([exn? (lambda (ex) (error "unable to fetch the requested file"))])
|
||||
(with-handlers ([exn:fail? (lambda (ex) (error "unable to fetch the requested file"))])
|
||||
(server-get-file taskid)))
|
||||
|
||||
|
||||
|
@ -505,12 +505,16 @@
|
|||
;; helper to repeatedly invoke an agent rpc
|
||||
(define (invoke/retry-forever proc)
|
||||
(let init-loop ([retry-delay *min-retry-delay*])
|
||||
(with-handlers ([exn? (lambda (ex)
|
||||
(log-server-error "agent ~a encountered error ~a" id ex)
|
||||
(sleep retry-delay)
|
||||
(init-loop (min *max-retry-delay*
|
||||
(* *retry-delay-ratio* retry-delay))))])
|
||||
(proc))))
|
||||
(define maybe-exn
|
||||
(with-handlers ([exn:fail? identity])
|
||||
(proc)
|
||||
#f))
|
||||
(when maybe-exn
|
||||
(log-server-error "agent ~a encountered error" id)
|
||||
((error-display-handler) (exn-message maybe-exn) maybe-exn)
|
||||
(sleep retry-delay)
|
||||
(init-loop (min *max-retry-delay*
|
||||
(* *retry-delay-ratio* retry-delay))))))
|
||||
|
||||
;; #t if a new assignment was added, otherwise #f
|
||||
(define (create-assignment! ts)
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
|
||||
;; checks if the current runtime provides a static ffi table
|
||||
(define (static-ffi-available?)
|
||||
(with-handlers ([exn? (lambda (ex) #f)])
|
||||
(with-handlers ([exn:fail? (lambda (ex) #f)])
|
||||
(dynamic-require ''#%static-ffi 'table)
|
||||
#t))
|
||||
|
||||
|
|
Loading…
Reference in New Issue