Avoid use of lexer, preferring regexp-try-match and explicit read-byte/read-char.

This commit is contained in:
Tony Garnock-Jones 2014-12-18 14:25:56 -05:00
parent 78553af55c
commit 083a66dcdf
1 changed files with 68 additions and 61 deletions

View File

@ -5,12 +5,12 @@
(struct-out unknown-escape-sequence)
lex-lcd-input)
(require parser-tools/lex)
(require (prefix-in : parser-tools/lex-sre))
(require racket/set)
(require racket/match)
(require (only-in racket/string string-split))
(define lcd-terminal-utf-8? (make-parameter #t))
(struct unknown-escape-sequence (string) #:prefab)
(struct key (value modifiers) #:prefab)
@ -24,12 +24,6 @@
(define (add-modifier modifier k)
(struct-copy key k [modifiers (set-add (key-modifiers k) modifier)]))
(define (char+ c offset)
(integer->char (+ (char->integer c) offset)))
(define (control-character lexeme offset ctor)
(ctor (char+ (string-ref lexeme 0) (+ (char->integer #\A) -1 offset))))
(define (decode-shifting-number v-plus-one k)
(define v (- v-plus-one 1))
(let* ((k (if (zero? (bitwise-and v 1)) k (add-modifier 'shift k)))
@ -62,10 +56,7 @@
[8 (ctor 'end)]
[_ (simple-key (unknown-escape-sequence lexeme))])))
(define (analyze-vt-bracket-key lexeme)
(match lexeme
[(pregexp "\e\\[([0-9]+(;[0-9]+)*)?(.)" (list _ paramstr _ mainchar))
(define params (and paramstr (map string->number (string-split paramstr ";"))))
(define (analyze-vt-bracket-key lexeme params mainchar)
(match mainchar
["~" (analyze-vt-tildeish-key lexeme params simple-key)]
["$" (analyze-vt-tildeish-key lexeme params S-)]
@ -92,7 +83,6 @@
["c" (S- 'right)]
["d" (S- 'left)]
["h" #:when (equal? params '(4)) (simple-key 'insert)] ;; st, http://st.suckless.org/
[_ (simple-key (unknown-escape-sequence lexeme))])]
[_ (simple-key (unknown-escape-sequence lexeme))]))
(define (analyze-vt-O-mainchar lexeme mainchar)
@ -132,26 +122,43 @@
["M" (add-modifier 'control (simple-key #\M))] ;; keypad enter (rxvt)
[_ (simple-key (unknown-escape-sequence lexeme))]))
(define (analyze-vt-O-key lexeme)
(match lexeme
[(pregexp "\eO(.)(.)" (list _ v-plus-one-str mainchar))
;; screen generates shifting escapes for the keypad like this
(decode-shifting-number (string->number v-plus-one-str)
(analyze-vt-O-mainchar lexeme mainchar))]
[(pregexp "\eO(.)" (list _ mainchar))
(analyze-vt-O-mainchar lexeme mainchar)]
[other (simple-key (unknown-escape-sequence lexeme))]))
(define (interpret-ascii-code b)
(cond
[(<= #x00 b #x1f) (C- (integer->char (+ b (char->integer #\A) -1)))]
[(<= #x20 b #x7e) (simple-key (integer->char b))]
[(= b #x7f) (simple-key 'backspace)]))
(define lex-lcd-input
(lexer [(eof) eof]
[(char-range #\u00 #\u1f) (control-character lexeme 0 C-)]
[(char-range #\u20 #\u7e) (simple-key (string-ref lexeme 0))]
[#\u7f (simple-key 'backspace)]
[(char-range #\u80 #\u9f) (control-character lexeme -128 C-M-)]
[(char-range #\u80 #\ufe) (M- (char+ (string-ref lexeme 0) -128))]
[#\uff (M- 'backspace)]
[(:: "\e[" (:? (:+ numeric) (:* #\; (:+ numeric))) any-char)
(analyze-vt-bracket-key lexeme)]
[(:: "\eO" any-char) (analyze-vt-O-key lexeme)]
[(:: "\eO" numeric any-char) (analyze-vt-O-key lexeme)]
))
(define (lex-lcd-input port)
(cond
[(eof-object? (peek-byte port)) eof]
[(or (regexp-try-match #px"^\e\\[([0-9]+(;[0-9]+)*)?(.)" port)
(regexp-try-match #px#"^\x9b([0-9]+(;[0-9]+)*)?(.)" port)) =>
(lambda (match-result)
(match-define (list lexeme parambytes _ mainbytes) match-result)
(define params (and parambytes
(map string->number (string-split (bytes->string/utf-8 parambytes) ";"))))
(analyze-vt-bracket-key lexeme params (bytes->string/utf-8 mainbytes)))]
[(regexp-try-match #px"^\eO([0-9])(.)" port) =>
;; screen generates shifting escapes for the keypad like this
(lambda (match-result)
(match-define (list lexeme v-plus-one-bytes mainbytes) match-result)
(decode-shifting-number (string->number (bytes->string/utf-8 v-plus-one-bytes))
(analyze-vt-O-mainchar lexeme (bytes->string/utf-8 mainbytes))))]
[(regexp-try-match #px"^\eO(.)" port) =>
(lambda (match-result)
(match-define (list lexeme mainbytes) match-result)
(analyze-vt-O-mainchar lexeme (bytes->string/utf-8 mainbytes)))]
;; Characters between #\u80 and #\uff are ambiguous because in
;; some terminals, the high bit is set to indicate meta, and in
;; others, they are plain UTF-8 characters. We let the user
;; distinguish via the lcd-terminal-utf-8? parameter.
[(not (lcd-terminal-utf-8?))
(define b (read-byte port))
(if (< b 128)
(interpret-ascii-code b)
(add-modifier 'meta (interpret-ascii-code (- b 128))))]
[else
(define b (char->integer (read-char port)))
(if (< b 128)
(interpret-ascii-code b)
(simple-key (integer->char b)))]))