From 083a66dcdf59fb425963df43f9d40ce247f175c0 Mon Sep 17 00:00:00 2001
From: Tony Garnock-Jones <tonygarnockjones@gmail.com>
Date: Thu, 18 Dec 2014 14:25:56 -0500
Subject: [PATCH] Avoid use of lexer, preferring regexp-try-match and explicit
 read-byte/read-char.

---
 ansi/lcd-terminal.rkt | 129 ++++++++++++++++++++++--------------------
 1 file changed, 68 insertions(+), 61 deletions(-)

diff --git a/ansi/lcd-terminal.rkt b/ansi/lcd-terminal.rkt
index a8891ee..87c0eb7 100644
--- a/ansi/lcd-terminal.rkt
+++ b/ansi/lcd-terminal.rkt
@@ -5,12 +5,12 @@
          (struct-out unknown-escape-sequence)
          lex-lcd-input)
 
-(require parser-tools/lex)
-(require (prefix-in : parser-tools/lex-sre))
 (require racket/set)
 (require racket/match)
 (require (only-in racket/string string-split))
 
+(define lcd-terminal-utf-8? (make-parameter #t))
+
 (struct unknown-escape-sequence (string) #:prefab)
 (struct key (value modifiers) #:prefab)
 
@@ -24,12 +24,6 @@
 (define (add-modifier modifier k)
   (struct-copy key k [modifiers (set-add (key-modifiers k) modifier)]))
 
-(define (char+ c offset)
-  (integer->char (+ (char->integer c) offset)))
-
-(define (control-character lexeme offset ctor)
-  (ctor (char+ (string-ref lexeme 0) (+ (char->integer #\A) -1 offset))))
-
 (define (decode-shifting-number v-plus-one k)
   (define v (- v-plus-one 1))
   (let* ((k (if (zero? (bitwise-and v 1)) k (add-modifier 'shift k)))
@@ -62,37 +56,33 @@
                             [8 (ctor 'end)]
                             [_ (simple-key (unknown-escape-sequence lexeme))])))
 
-(define (analyze-vt-bracket-key lexeme)
-  (match lexeme
-    [(pregexp "\e\\[([0-9]+(;[0-9]+)*)?(.)" (list _ paramstr _ mainchar))
-     (define params (and paramstr (map string->number (string-split paramstr ";"))))
-     (match mainchar
-       ["~" (analyze-vt-tildeish-key lexeme params simple-key)]
-       ["$" (analyze-vt-tildeish-key lexeme params S-)]
-       ["^" (analyze-vt-tildeish-key lexeme params C-)]
-       ["@" (analyze-vt-tildeish-key lexeme params C-S-)]
-       ["A" (decode-shifting params 'up)]
-       ["B" (decode-shifting params 'down)]
-       ["C" (decode-shifting params 'right)]
-       ["D" (decode-shifting params 'left)]
-       ["E" (decode-shifting params 'begin)]
-       ["F" (decode-shifting params 'end)]
-       ["G" (decode-shifting params 'begin)] ;; linux console (!)
-       ["H" (decode-shifting params 'home)]
-       ["J" #:when (equal? params '(2)) (S- 'home)] ;; st, http://st.suckless.org/
-       ["J" #:when (not params) (C- 'end)] ;; st, http://st.suckless.org/
-       ["K" #:when (equal? params '(2)) (S- 'delete)] ;; st, http://st.suckless.org/
-       ["K" #:when (not params) (S- 'end)] ;; st, http://st.suckless.org/
-       ["L" (C- 'insert)] ;; st, http://st.suckless.org/
-       ["M" (C- 'delete)] ;; st, http://st.suckless.org/
-       ["P" (simple-key 'delete)] ;; st, http://st.suckless.org/
-       ["Z" (C-S- #\I)] ;; TODO: should this instead be a 'backtab key?
-       ["a" (S- 'up)]
-       ["b" (S- 'down)]
-       ["c" (S- 'right)]
-       ["d" (S- 'left)]
-       ["h" #:when (equal? params '(4)) (simple-key 'insert)] ;; st, http://st.suckless.org/
-       [_ (simple-key (unknown-escape-sequence lexeme))])]
+(define (analyze-vt-bracket-key lexeme params mainchar)
+  (match mainchar
+    ["~" (analyze-vt-tildeish-key lexeme params simple-key)]
+    ["$" (analyze-vt-tildeish-key lexeme params S-)]
+    ["^" (analyze-vt-tildeish-key lexeme params C-)]
+    ["@" (analyze-vt-tildeish-key lexeme params C-S-)]
+    ["A" (decode-shifting params 'up)]
+    ["B" (decode-shifting params 'down)]
+    ["C" (decode-shifting params 'right)]
+    ["D" (decode-shifting params 'left)]
+    ["E" (decode-shifting params 'begin)]
+    ["F" (decode-shifting params 'end)]
+    ["G" (decode-shifting params 'begin)] ;; linux console (!)
+    ["H" (decode-shifting params 'home)]
+    ["J" #:when (equal? params '(2)) (S- 'home)] ;; st, http://st.suckless.org/
+    ["J" #:when (not params) (C- 'end)] ;; st, http://st.suckless.org/
+    ["K" #:when (equal? params '(2)) (S- 'delete)] ;; st, http://st.suckless.org/
+    ["K" #:when (not params) (S- 'end)] ;; st, http://st.suckless.org/
+    ["L" (C- 'insert)]                  ;; st, http://st.suckless.org/
+    ["M" (C- 'delete)]                  ;; st, http://st.suckless.org/
+    ["P" (simple-key 'delete)]          ;; st, http://st.suckless.org/
+    ["Z" (C-S- #\I)] ;; TODO: should this instead be a 'backtab key?
+    ["a" (S- 'up)]
+    ["b" (S- 'down)]
+    ["c" (S- 'right)]
+    ["d" (S- 'left)]
+    ["h" #:when (equal? params '(4)) (simple-key 'insert)] ;; st, http://st.suckless.org/
     [_ (simple-key (unknown-escape-sequence lexeme))]))
 
 (define (analyze-vt-O-mainchar lexeme mainchar)
@@ -132,26 +122,43 @@
     ["M" (add-modifier 'control (simple-key #\M))] ;; keypad enter (rxvt)
     [_ (simple-key (unknown-escape-sequence lexeme))]))
 
-(define (analyze-vt-O-key lexeme)
-  (match lexeme
-    [(pregexp "\eO(.)(.)" (list _ v-plus-one-str mainchar))
-     ;; screen generates shifting escapes for the keypad like this
-     (decode-shifting-number (string->number v-plus-one-str)
-                             (analyze-vt-O-mainchar lexeme mainchar))]
-    [(pregexp "\eO(.)" (list _ mainchar))
-     (analyze-vt-O-mainchar lexeme mainchar)]
-    [other (simple-key (unknown-escape-sequence lexeme))]))
+(define (interpret-ascii-code b)
+  (cond
+   [(<= #x00 b #x1f) (C- (integer->char (+ b (char->integer #\A) -1)))]
+   [(<= #x20 b #x7e) (simple-key (integer->char b))]
+   [(= b #x7f) (simple-key 'backspace)]))
 
-(define lex-lcd-input
-  (lexer [(eof) eof]
-         [(char-range #\u00 #\u1f) (control-character lexeme 0 C-)]
-         [(char-range #\u20 #\u7e) (simple-key (string-ref lexeme 0))]
-         [#\u7f (simple-key 'backspace)]
-         [(char-range #\u80 #\u9f) (control-character lexeme -128 C-M-)]
-         [(char-range #\u80 #\ufe) (M- (char+ (string-ref lexeme 0) -128))]
-         [#\uff (M- 'backspace)]
-         [(:: "\e[" (:? (:+ numeric) (:* #\; (:+ numeric))) any-char)
-          (analyze-vt-bracket-key lexeme)]
-         [(:: "\eO" any-char) (analyze-vt-O-key lexeme)]
-         [(:: "\eO" numeric any-char) (analyze-vt-O-key lexeme)]
-         ))
+(define (lex-lcd-input port)
+  (cond
+   [(eof-object? (peek-byte port)) eof]
+   [(or (regexp-try-match #px"^\e\\[([0-9]+(;[0-9]+)*)?(.)" port)
+        (regexp-try-match #px#"^\x9b([0-9]+(;[0-9]+)*)?(.)" port)) =>
+    (lambda (match-result)
+      (match-define (list lexeme parambytes _ mainbytes) match-result)
+      (define params (and parambytes
+                          (map string->number (string-split (bytes->string/utf-8 parambytes) ";"))))
+      (analyze-vt-bracket-key lexeme params (bytes->string/utf-8 mainbytes)))]
+   [(regexp-try-match #px"^\eO([0-9])(.)" port) =>
+    ;; screen generates shifting escapes for the keypad like this
+    (lambda (match-result)
+      (match-define (list lexeme v-plus-one-bytes mainbytes) match-result)
+      (decode-shifting-number (string->number (bytes->string/utf-8 v-plus-one-bytes))
+                              (analyze-vt-O-mainchar lexeme (bytes->string/utf-8 mainbytes))))]
+   [(regexp-try-match #px"^\eO(.)" port) =>
+    (lambda (match-result)
+      (match-define (list lexeme mainbytes) match-result)
+      (analyze-vt-O-mainchar lexeme (bytes->string/utf-8 mainbytes)))]
+   ;; Characters between #\u80 and #\uff are ambiguous because in
+   ;; some terminals, the high bit is set to indicate meta, and in
+   ;; others, they are plain UTF-8 characters. We let the user
+   ;; distinguish via the lcd-terminal-utf-8? parameter.
+   [(not (lcd-terminal-utf-8?))
+    (define b (read-byte port))
+    (if (< b 128)
+        (interpret-ascii-code b)
+        (add-modifier 'meta (interpret-ascii-code (- b 128))))]
+   [else
+    (define b (char->integer (read-char port)))
+    (if (< b 128)
+        (interpret-ascii-code b)
+        (simple-key (integer->char b)))]))