From d2bf2297578f929951c65ac0cc5bffb85ba7f002 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Mon, 15 Dec 2014 01:29:48 -0500 Subject: [PATCH] Splay ropes --- rope.rkt | 382 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 382 insertions(+) create mode 100644 rope.rkt diff --git a/rope.rkt b/rope.rkt new file mode 100644 index 0000000..ca8210c --- /dev/null +++ b/rope.rkt @@ -0,0 +1,382 @@ +#lang racket/base +;; Ropes for text editing + +(provide empty-strand + string->strand + strand->string + substrand + strand-equal? + strand-empty? + + empty-rope + strand->rope + string->rope + rope->string + + rope-size + rope-marks + rope-split + rope-append + rope-concat + subrope + + has-mark? + lookup-mark + set-mark + clear-mark + update-mark) + +(require racket/set) +(require racket/match) + +(module+ test (require rackunit racket/pretty)) + +;; A Mark is a Symbol. + +;; A Strand is a (strand String Number Number), representing a +;; substring of a string. +(struct strand (text offset count) #:transparent) + +;; A Rope is a splay tree representing a long piece of text. +;; #f is the empty Rope; otherwise a (rope) struct instance. +;; INVARIANT: Adjacent ropes will be merged to maximize sharing. +(struct rope (strand ;; Strand + left ;; Rope or #f + right ;; Rope or #f + size* ;; Number, total length of this rope + marks* ;; Set of Mark + mark-index ;; Hashtable from Mark to Number, marks in this span + ) #:transparent) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Strands + +(define *glom-up-to* 128) + +(define (empty-strand) (strand "" 0 0)) + +(define (string->strand s) + (strand s 0 (string-length s))) + +(define (strand->string t) + (match-define (strand text offset count) t) + (if (and (zero? offset) (= count (string-length text))) + text + (substring text offset (+ offset count)))) + +(define (substrand t0 [lo0 #f] [hi0 #f]) + (define t (if (string? t0) (string->strand t0) t0)) + (define lo (if (not lo0) + 0 + (modulo lo0 (strand-count t)))) + (define hi (if (not hi0) + (strand-count t) + (modulo hi0 (strand-count t)))) + (strand (strand-text t) + (+ (strand-offset t) lo) + (- hi lo))) + +(define (strand-maybe-append t1 t2) + (match-define (strand text1 offset1 count1) t1) + (match-define (strand text2 offset2 count2) t2) + (or (and (zero? count1) t2) + (and (zero? count2) t1) + (and (eq? text1 text2) + (= (+ offset1 count1) offset2) + (strand text1 offset1 (+ count1 count2))) + ;; TODO: measure to see if the following improves or worsens memory usage + (and (< (+ count1 count2) *glom-up-to*) + (string->strand (string-append (strand->string t1) (strand->string t2)))))) + +(define (strand-equal? t1 t2) + (string=? (strand->string t1) + (strand->string t2))) + +(define (strand-empty? t) + (zero? (strand-count t))) + +(module+ test + (check-equal? (strand-count (empty-strand)) 0) + (check-equal? (strand-count (string->strand "")) 0) + (check-true (strand-equal? (empty-strand) (string->strand "")))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Ropes + +(define (empty-rope) #f) + +(define (strand->rope t) + (rope t #f #f (strand-count t) (set) (hash))) + +(define (string->rope s) + (strand->rope (string->strand s))) + +(define (rope->string r) + (define buf (make-string (rope-size r))) + (let fill! ((r r) (offset 0)) + (when r + (fill! (rope-left r) offset) + (define lo (rope-lo r)) + (define s (rope-strand r)) + (string-copy! buf + (+ offset lo) + (strand-text s) + (strand-offset s) + (+ (strand-offset s) (strand-count s))) + (fill! (rope-right r) (+ offset lo (strand-count s))))) + buf) + +(define (replace-left r n) (if r (reindex (struct-copy rope r [left n])) n)) +(define (replace-right r n) (if r (reindex (struct-copy rope r [right n])) n)) +(define (replace-both r rl rr) (reindex (struct-copy rope r [left rl] [right rr]))) + +(define (splay-to r direction-finder arg0) + ;; zig: last. desired position is a direct (left/right) child of r. + ;; zig-zig: desired position is within a (left-left/right-right) grandchild of r. + ;; zig-zag: desired position is within a (left-right/right-left) grandchild of r. + (define-values (where arg1) (direction-finder arg0 r)) + (match where + ['here (values arg1 r)] + ['left + (define rl (rope-left r)) + (define-values (where arg2) (direction-finder arg1 rl)) + (match where + ['here ;; zig. + (values arg2 (replace-right rl (replace-left r (and rl (rope-right rl)))))] + ['left ;; zig-zig + (define-values (v rll) (splay-to (rope-left rl) direction-finder arg2)) + (values v (replace-right rll (replace-both rl + (and rll (rope-right rll)) + (replace-left r (rope-right rl)))))] + ['right ;; zig-zag + (define-values (v rlr) (splay-to (rope-right rl) direction-finder arg2)) + (values v (replace-both rlr + (replace-right rl (rope-left rlr)) + (replace-left r (rope-right rlr))))])] + ['right + (define rr (rope-right r)) + (define-values (where arg2) (direction-finder arg1 rr)) + (match where + ['here ;; zig. + (values arg2 (replace-left rr (replace-right r (and rr (rope-left rr)))))] + ['left ;; zig-zag + (define-values (v rrl) (splay-to (rope-left rr) direction-finder arg2)) + (values v (replace-both rrl + (replace-right r (rope-left rrl)) + (replace-left rr (rope-right rrl))))] + ['right ;; zig-zig + (define-values (v rrr) (splay-to (rope-right rr) direction-finder arg2)) + (values v (replace-left rrr (replace-both rr + (replace-right r (rope-left rr)) + (and rrr (rope-left rrr)))))])])) + +(define (rope-lo r) + (rope-size (rope-left r))) + +(define (rope-lo+hi r) + (define lo (rope-lo r)) + (values lo (+ lo (strand-count (rope-strand r))))) + +(define (find-position pos r) + (if (not r) + (values 'here (zero? pos)) + (let-values (((lo hi) (rope-lo+hi r))) + (cond + [(< pos lo) (values 'left pos)] + [(< pos hi) (values 'here #t)] + [else (values 'right (- pos hi))])))) + +(define (find-mark mark) + (define (walk offset r) + (cond + [(not r) + (values 'here #f)] + [(hash-ref (rope-mark-index r) mark #f) => + (lambda (p) (values 'here (+ offset (rope-lo r) p)))] + [(set-member? (rope-marks (rope-left r)) mark) + (values 'left offset)] + [(set-member? (rope-marks (rope-right r)) mark) + (values 'right (+ offset (rope-lo r) (strand-count (rope-strand r))))] + [else + (values 'here #f)])) + ;; (trace walk) + walk) + +(define (has-mark? r mark) + (and r (set-member? (rope-marks r) mark))) + +(define (lookup-mark r mark) + (splay-to r (find-mark mark) 0)) + +(define (set-mark r0 mark position) + (when (set-member? (rope-marks r0) mark) (error 'set-mark "Duplicate mark: ~a" mark)) + (define-values (found? r1) (splay-to r0 find-position position)) + (when (not found?) (error 'set-mark "Invalid position ~a setting mark ~a" position mark)) + (reindex + (if (not r1) + (rope (empty-strand) #f #f 'will-be-recomputed 'will-be-recomputed (hash mark position)) + (struct-copy rope r1 + [mark-index (hash-set (rope-mark-index r1) mark (- position (rope-lo r1)))])))) + +(define (clear-mark r0 mark) + (define-values (old-pos r) (lookup-mark r0 mark)) + (if old-pos + (struct-copy rope r + [marks* (set-remove (rope-marks* r) mark)] + [mark-index (hash-remove (rope-mark-index r) mark)]) + r)) + +(define (update-mark r0 mark position) + (set-mark (if (has-mark? r0 mark) + (clear-mark r0 mark) + r0) + mark + position)) + +(define (rope-size r) + (if r (rope-size* r) 0)) + +(define (rope-marks r) + (if r (rope-marks* r) (set))) + +(define (reindex r) + (struct-copy rope r + [size* (+ (rope-size (rope-left r)) + (rope-size (rope-right r)) + (strand-count (rope-strand r)))] + [marks* (set-union (rope-marks (rope-left r)) + (rope-marks (rope-right r)) + (list->set (hash-keys (rope-mark-index r))))])) + +(define (rope-split r0 position) + (define-values (found? r) (splay-to r0 find-position position)) + (when (not found?) (error 'rope-split "Invalid position ~a" position)) + ;; We know the position is in the root of r. + (match-define (rope t rl rr size marks mark-index) r) + (define-values (lo hi) (rope-lo+hi r)) + (define offset (- position lo)) + (define-values (left-index marks-at-split right-index) + (partition-mark-index mark-index offset)) + (define left-strand (substrand t 0 offset)) + (define right-strand (substrand t offset)) + (values (if (strand-empty? left-strand) + rl + (reindex (rope left-strand rl #f 'will-be-recomputed (set) left-index))) + marks-at-split + (if (strand-empty? right-strand) + rr + (reindex (rope right-strand #f rr 'will-be-recomputed (set) right-index))))) + +(define (partition-mark-index index offset) + (for/fold [(l (hash)) (m (set)) (r (hash))] + [((mark pos) (in-hash index))] + (values (if (< pos offset) (hash-set l mark pos) l) + (if (= pos offset) (set-add m mark) m) + (if (> pos offset) (hash-set r mark (- pos offset)) r)))) + +(define (rope-append rl0 rr0) + (cond + [(not rl0) rr0] + [(not rr0) rl0] + [else + (define-values (_l rl) (splay-to rl0 find-position (rope-size rl0))) + (define-values (_r rr) (splay-to rr0 find-position 0)) + ;; Both rl's right and rr's left are #f. + (define t (strand-maybe-append (rope-strand rl) (rope-strand rr))) + (if t + (let ((merged-index (merge-mark-indexes (rope-mark-index rl) + (rope-mark-index rr) + (strand-count (rope-strand rl))))) + (reindex (rope t (rope-left rl) (rope-right rr) 'will-be-recomputed (set) merged-index))) + (replace-right rl rr))])) + +(define (rope-concat rs) + (foldr rope-append (empty-rope) rs)) + +(define (merge-mark-indexes li ri offset) + (for/fold [(i li)] [((k v) (in-hash ri))] + (if (hash-has-key? i k) + (error 'merge-mark-indexes "Duplicate mark: ~a" k) + (hash-set i k (+ offset v))))) + +(define (subrope* r0 [lo0 #f] [hi0 #f]) + (define lo (if (not lo0) + 0 + (modulo lo0 (rope-size r0)))) + (define hi (if (not hi0) + (rope-size r0) + (modulo hi0 (rope-size r0)))) + (define-values (_l left-marks-at-split mr) (rope-split r0 lo)) + (define-values (m right-marks-at-split _r) (rope-split mr (- hi lo))) + (values left-marks-at-split m right-marks-at-split)) + +(define (subrope r0 [lo0 #f] [hi0 #f]) + (define-values (_l m _r) (subrope* r0 lo0 hi0)) + m) + +;; (require racket/trace) +;; (trace splay-to find-position rope-concat rope-append rope-split rope->string) + +(module+ test + (require (only-in racket/string string-append*)) + + (check-equal? (rope-size (empty-rope)) 0) + + (define (test-with-pieces string-pieces) + (define rope-pieces (map string->rope string-pieces)) + (define text (string-append* string-pieces)) + (check-equal? (rope->string (car rope-pieces)) (car string-pieces)) + (check-equal? (rope->string (rope-concat rope-pieces)) text) + (check-equal? (rope-size (rope-concat rope-pieces)) (string-length text)) + + (check-eq? (rope-append (empty-rope) (car rope-pieces)) (car rope-pieces)) + (check-eq? (rope-append (car rope-pieces) (empty-rope)) (car rope-pieces)) + + (let loop ((n 1000) (r0 (rope-concat rope-pieces))) + (when (positive? n) + (define pos (random (+ (rope-size r0) 1))) + ;; (pretty-print (list pos r0)) + (define-values (found? r) (splay-to r0 find-position pos)) + (check-true found?) + (check-equal? (rope->string r) text) + (loop (- n 1) r))) + + (let*-values (((r) (set-mark (rope-concat rope-pieces) 'mark 9)) + ((_) (check-equal? (rope->string r) text)) + ((pos r) (lookup-mark r 'mark)) + ((_) (check-equal? pos 9)) + ((r) (clear-mark r 'mark)) + ((pos r) (lookup-mark r 'mark)) + ((_) (check-false pos)) + ((r) (update-mark r 'mark 9)) + ((pos r) (lookup-mark r 'mark)) + ((_) (check-equal? pos 9)) + ((r) (update-mark r 'mark 6)) + ((pos r) (lookup-mark r 'mark)) + ((_) (check-equal? pos 6)) + ((l ms r) (rope-split r pos)) + ((_) (check-equal? ms (set 'mark))) + ((_) (check-equal? (rope->string l) (substring text 0 6))) + ((_) (check-equal? (rope->string r) (substring text 6 (string-length text)))) + ((l ms r) (rope-split r 3)) + ((_) (check-equal? ms (set))) + ((_) (check-equal? (rope->string l) (substring text 6 9))) + ((_) (check-equal? (rope->string r) (substring text 9 (string-length text))))) + (void))) + + (define prejudice-pieces + (list "It is a truth universally acknowledged, that a single man in possession of a good fortune must be in want of a wife.\n" + "\n" + "However little known the feelings or views of such a man may be on his first entering a neighbourhood, this truth is so well fixed in the minds of the surrounding families, that he is considered as the rightful property of some one or other of their daughters.\n" + "\n" + "``My dear Mr. Bennet,'' said his lady to him one day, ``have you heard that Netherfield Park is let at last?''\n" + "\n" + "Mr. Bennet replied that he had not.\n")) + + (define (atomize-pieces pieces) + (map string (string->list (string-append* pieces)))) + + (test-with-pieces (list "hello" ", " "world")) + (test-with-pieces prejudice-pieces) + (test-with-pieces (atomize-pieces prejudice-pieces)) + )