Splay ropes
This commit is contained in:
parent
ca66ff7a51
commit
d2bf229757
|
@ -0,0 +1,382 @@
|
|||
#lang racket/base
|
||||
;; Ropes for text editing
|
||||
|
||||
(provide empty-strand
|
||||
string->strand
|
||||
strand->string
|
||||
substrand
|
||||
strand-equal?
|
||||
strand-empty?
|
||||
|
||||
empty-rope
|
||||
strand->rope
|
||||
string->rope
|
||||
rope->string
|
||||
|
||||
rope-size
|
||||
rope-marks
|
||||
rope-split
|
||||
rope-append
|
||||
rope-concat
|
||||
subrope
|
||||
|
||||
has-mark?
|
||||
lookup-mark
|
||||
set-mark
|
||||
clear-mark
|
||||
update-mark)
|
||||
|
||||
(require racket/set)
|
||||
(require racket/match)
|
||||
|
||||
(module+ test (require rackunit racket/pretty))
|
||||
|
||||
;; A Mark is a Symbol.
|
||||
|
||||
;; A Strand is a (strand String Number Number), representing a
|
||||
;; substring of a string.
|
||||
(struct strand (text offset count) #:transparent)
|
||||
|
||||
;; A Rope is a splay tree representing a long piece of text.
|
||||
;; #f is the empty Rope; otherwise a (rope) struct instance.
|
||||
;; INVARIANT: Adjacent ropes will be merged to maximize sharing.
|
||||
(struct rope (strand ;; Strand
|
||||
left ;; Rope or #f
|
||||
right ;; Rope or #f
|
||||
size* ;; Number, total length of this rope
|
||||
marks* ;; Set of Mark
|
||||
mark-index ;; Hashtable from Mark to Number, marks in this span
|
||||
) #:transparent)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Strands
|
||||
|
||||
(define *glom-up-to* 128)
|
||||
|
||||
(define (empty-strand) (strand "" 0 0))
|
||||
|
||||
(define (string->strand s)
|
||||
(strand s 0 (string-length s)))
|
||||
|
||||
(define (strand->string t)
|
||||
(match-define (strand text offset count) t)
|
||||
(if (and (zero? offset) (= count (string-length text)))
|
||||
text
|
||||
(substring text offset (+ offset count))))
|
||||
|
||||
(define (substrand t0 [lo0 #f] [hi0 #f])
|
||||
(define t (if (string? t0) (string->strand t0) t0))
|
||||
(define lo (if (not lo0)
|
||||
0
|
||||
(modulo lo0 (strand-count t))))
|
||||
(define hi (if (not hi0)
|
||||
(strand-count t)
|
||||
(modulo hi0 (strand-count t))))
|
||||
(strand (strand-text t)
|
||||
(+ (strand-offset t) lo)
|
||||
(- hi lo)))
|
||||
|
||||
(define (strand-maybe-append t1 t2)
|
||||
(match-define (strand text1 offset1 count1) t1)
|
||||
(match-define (strand text2 offset2 count2) t2)
|
||||
(or (and (zero? count1) t2)
|
||||
(and (zero? count2) t1)
|
||||
(and (eq? text1 text2)
|
||||
(= (+ offset1 count1) offset2)
|
||||
(strand text1 offset1 (+ count1 count2)))
|
||||
;; TODO: measure to see if the following improves or worsens memory usage
|
||||
(and (< (+ count1 count2) *glom-up-to*)
|
||||
(string->strand (string-append (strand->string t1) (strand->string t2))))))
|
||||
|
||||
(define (strand-equal? t1 t2)
|
||||
(string=? (strand->string t1)
|
||||
(strand->string t2)))
|
||||
|
||||
(define (strand-empty? t)
|
||||
(zero? (strand-count t)))
|
||||
|
||||
(module+ test
|
||||
(check-equal? (strand-count (empty-strand)) 0)
|
||||
(check-equal? (strand-count (string->strand "")) 0)
|
||||
(check-true (strand-equal? (empty-strand) (string->strand ""))))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Ropes
|
||||
|
||||
(define (empty-rope) #f)
|
||||
|
||||
(define (strand->rope t)
|
||||
(rope t #f #f (strand-count t) (set) (hash)))
|
||||
|
||||
(define (string->rope s)
|
||||
(strand->rope (string->strand s)))
|
||||
|
||||
(define (rope->string r)
|
||||
(define buf (make-string (rope-size r)))
|
||||
(let fill! ((r r) (offset 0))
|
||||
(when r
|
||||
(fill! (rope-left r) offset)
|
||||
(define lo (rope-lo r))
|
||||
(define s (rope-strand r))
|
||||
(string-copy! buf
|
||||
(+ offset lo)
|
||||
(strand-text s)
|
||||
(strand-offset s)
|
||||
(+ (strand-offset s) (strand-count s)))
|
||||
(fill! (rope-right r) (+ offset lo (strand-count s)))))
|
||||
buf)
|
||||
|
||||
(define (replace-left r n) (if r (reindex (struct-copy rope r [left n])) n))
|
||||
(define (replace-right r n) (if r (reindex (struct-copy rope r [right n])) n))
|
||||
(define (replace-both r rl rr) (reindex (struct-copy rope r [left rl] [right rr])))
|
||||
|
||||
(define (splay-to r direction-finder arg0)
|
||||
;; zig: last. desired position is a direct (left/right) child of r.
|
||||
;; zig-zig: desired position is within a (left-left/right-right) grandchild of r.
|
||||
;; zig-zag: desired position is within a (left-right/right-left) grandchild of r.
|
||||
(define-values (where arg1) (direction-finder arg0 r))
|
||||
(match where
|
||||
['here (values arg1 r)]
|
||||
['left
|
||||
(define rl (rope-left r))
|
||||
(define-values (where arg2) (direction-finder arg1 rl))
|
||||
(match where
|
||||
['here ;; zig.
|
||||
(values arg2 (replace-right rl (replace-left r (and rl (rope-right rl)))))]
|
||||
['left ;; zig-zig
|
||||
(define-values (v rll) (splay-to (rope-left rl) direction-finder arg2))
|
||||
(values v (replace-right rll (replace-both rl
|
||||
(and rll (rope-right rll))
|
||||
(replace-left r (rope-right rl)))))]
|
||||
['right ;; zig-zag
|
||||
(define-values (v rlr) (splay-to (rope-right rl) direction-finder arg2))
|
||||
(values v (replace-both rlr
|
||||
(replace-right rl (rope-left rlr))
|
||||
(replace-left r (rope-right rlr))))])]
|
||||
['right
|
||||
(define rr (rope-right r))
|
||||
(define-values (where arg2) (direction-finder arg1 rr))
|
||||
(match where
|
||||
['here ;; zig.
|
||||
(values arg2 (replace-left rr (replace-right r (and rr (rope-left rr)))))]
|
||||
['left ;; zig-zag
|
||||
(define-values (v rrl) (splay-to (rope-left rr) direction-finder arg2))
|
||||
(values v (replace-both rrl
|
||||
(replace-right r (rope-left rrl))
|
||||
(replace-left rr (rope-right rrl))))]
|
||||
['right ;; zig-zig
|
||||
(define-values (v rrr) (splay-to (rope-right rr) direction-finder arg2))
|
||||
(values v (replace-left rrr (replace-both rr
|
||||
(replace-right r (rope-left rr))
|
||||
(and rrr (rope-left rrr)))))])]))
|
||||
|
||||
(define (rope-lo r)
|
||||
(rope-size (rope-left r)))
|
||||
|
||||
(define (rope-lo+hi r)
|
||||
(define lo (rope-lo r))
|
||||
(values lo (+ lo (strand-count (rope-strand r)))))
|
||||
|
||||
(define (find-position pos r)
|
||||
(if (not r)
|
||||
(values 'here (zero? pos))
|
||||
(let-values (((lo hi) (rope-lo+hi r)))
|
||||
(cond
|
||||
[(< pos lo) (values 'left pos)]
|
||||
[(< pos hi) (values 'here #t)]
|
||||
[else (values 'right (- pos hi))]))))
|
||||
|
||||
(define (find-mark mark)
|
||||
(define (walk offset r)
|
||||
(cond
|
||||
[(not r)
|
||||
(values 'here #f)]
|
||||
[(hash-ref (rope-mark-index r) mark #f) =>
|
||||
(lambda (p) (values 'here (+ offset (rope-lo r) p)))]
|
||||
[(set-member? (rope-marks (rope-left r)) mark)
|
||||
(values 'left offset)]
|
||||
[(set-member? (rope-marks (rope-right r)) mark)
|
||||
(values 'right (+ offset (rope-lo r) (strand-count (rope-strand r))))]
|
||||
[else
|
||||
(values 'here #f)]))
|
||||
;; (trace walk)
|
||||
walk)
|
||||
|
||||
(define (has-mark? r mark)
|
||||
(and r (set-member? (rope-marks r) mark)))
|
||||
|
||||
(define (lookup-mark r mark)
|
||||
(splay-to r (find-mark mark) 0))
|
||||
|
||||
(define (set-mark r0 mark position)
|
||||
(when (set-member? (rope-marks r0) mark) (error 'set-mark "Duplicate mark: ~a" mark))
|
||||
(define-values (found? r1) (splay-to r0 find-position position))
|
||||
(when (not found?) (error 'set-mark "Invalid position ~a setting mark ~a" position mark))
|
||||
(reindex
|
||||
(if (not r1)
|
||||
(rope (empty-strand) #f #f 'will-be-recomputed 'will-be-recomputed (hash mark position))
|
||||
(struct-copy rope r1
|
||||
[mark-index (hash-set (rope-mark-index r1) mark (- position (rope-lo r1)))]))))
|
||||
|
||||
(define (clear-mark r0 mark)
|
||||
(define-values (old-pos r) (lookup-mark r0 mark))
|
||||
(if old-pos
|
||||
(struct-copy rope r
|
||||
[marks* (set-remove (rope-marks* r) mark)]
|
||||
[mark-index (hash-remove (rope-mark-index r) mark)])
|
||||
r))
|
||||
|
||||
(define (update-mark r0 mark position)
|
||||
(set-mark (if (has-mark? r0 mark)
|
||||
(clear-mark r0 mark)
|
||||
r0)
|
||||
mark
|
||||
position))
|
||||
|
||||
(define (rope-size r)
|
||||
(if r (rope-size* r) 0))
|
||||
|
||||
(define (rope-marks r)
|
||||
(if r (rope-marks* r) (set)))
|
||||
|
||||
(define (reindex r)
|
||||
(struct-copy rope r
|
||||
[size* (+ (rope-size (rope-left r))
|
||||
(rope-size (rope-right r))
|
||||
(strand-count (rope-strand r)))]
|
||||
[marks* (set-union (rope-marks (rope-left r))
|
||||
(rope-marks (rope-right r))
|
||||
(list->set (hash-keys (rope-mark-index r))))]))
|
||||
|
||||
(define (rope-split r0 position)
|
||||
(define-values (found? r) (splay-to r0 find-position position))
|
||||
(when (not found?) (error 'rope-split "Invalid position ~a" position))
|
||||
;; We know the position is in the root of r.
|
||||
(match-define (rope t rl rr size marks mark-index) r)
|
||||
(define-values (lo hi) (rope-lo+hi r))
|
||||
(define offset (- position lo))
|
||||
(define-values (left-index marks-at-split right-index)
|
||||
(partition-mark-index mark-index offset))
|
||||
(define left-strand (substrand t 0 offset))
|
||||
(define right-strand (substrand t offset))
|
||||
(values (if (strand-empty? left-strand)
|
||||
rl
|
||||
(reindex (rope left-strand rl #f 'will-be-recomputed (set) left-index)))
|
||||
marks-at-split
|
||||
(if (strand-empty? right-strand)
|
||||
rr
|
||||
(reindex (rope right-strand #f rr 'will-be-recomputed (set) right-index)))))
|
||||
|
||||
(define (partition-mark-index index offset)
|
||||
(for/fold [(l (hash)) (m (set)) (r (hash))]
|
||||
[((mark pos) (in-hash index))]
|
||||
(values (if (< pos offset) (hash-set l mark pos) l)
|
||||
(if (= pos offset) (set-add m mark) m)
|
||||
(if (> pos offset) (hash-set r mark (- pos offset)) r))))
|
||||
|
||||
(define (rope-append rl0 rr0)
|
||||
(cond
|
||||
[(not rl0) rr0]
|
||||
[(not rr0) rl0]
|
||||
[else
|
||||
(define-values (_l rl) (splay-to rl0 find-position (rope-size rl0)))
|
||||
(define-values (_r rr) (splay-to rr0 find-position 0))
|
||||
;; Both rl's right and rr's left are #f.
|
||||
(define t (strand-maybe-append (rope-strand rl) (rope-strand rr)))
|
||||
(if t
|
||||
(let ((merged-index (merge-mark-indexes (rope-mark-index rl)
|
||||
(rope-mark-index rr)
|
||||
(strand-count (rope-strand rl)))))
|
||||
(reindex (rope t (rope-left rl) (rope-right rr) 'will-be-recomputed (set) merged-index)))
|
||||
(replace-right rl rr))]))
|
||||
|
||||
(define (rope-concat rs)
|
||||
(foldr rope-append (empty-rope) rs))
|
||||
|
||||
(define (merge-mark-indexes li ri offset)
|
||||
(for/fold [(i li)] [((k v) (in-hash ri))]
|
||||
(if (hash-has-key? i k)
|
||||
(error 'merge-mark-indexes "Duplicate mark: ~a" k)
|
||||
(hash-set i k (+ offset v)))))
|
||||
|
||||
(define (subrope* r0 [lo0 #f] [hi0 #f])
|
||||
(define lo (if (not lo0)
|
||||
0
|
||||
(modulo lo0 (rope-size r0))))
|
||||
(define hi (if (not hi0)
|
||||
(rope-size r0)
|
||||
(modulo hi0 (rope-size r0))))
|
||||
(define-values (_l left-marks-at-split mr) (rope-split r0 lo))
|
||||
(define-values (m right-marks-at-split _r) (rope-split mr (- hi lo)))
|
||||
(values left-marks-at-split m right-marks-at-split))
|
||||
|
||||
(define (subrope r0 [lo0 #f] [hi0 #f])
|
||||
(define-values (_l m _r) (subrope* r0 lo0 hi0))
|
||||
m)
|
||||
|
||||
;; (require racket/trace)
|
||||
;; (trace splay-to find-position rope-concat rope-append rope-split rope->string)
|
||||
|
||||
(module+ test
|
||||
(require (only-in racket/string string-append*))
|
||||
|
||||
(check-equal? (rope-size (empty-rope)) 0)
|
||||
|
||||
(define (test-with-pieces string-pieces)
|
||||
(define rope-pieces (map string->rope string-pieces))
|
||||
(define text (string-append* string-pieces))
|
||||
(check-equal? (rope->string (car rope-pieces)) (car string-pieces))
|
||||
(check-equal? (rope->string (rope-concat rope-pieces)) text)
|
||||
(check-equal? (rope-size (rope-concat rope-pieces)) (string-length text))
|
||||
|
||||
(check-eq? (rope-append (empty-rope) (car rope-pieces)) (car rope-pieces))
|
||||
(check-eq? (rope-append (car rope-pieces) (empty-rope)) (car rope-pieces))
|
||||
|
||||
(let loop ((n 1000) (r0 (rope-concat rope-pieces)))
|
||||
(when (positive? n)
|
||||
(define pos (random (+ (rope-size r0) 1)))
|
||||
;; (pretty-print (list pos r0))
|
||||
(define-values (found? r) (splay-to r0 find-position pos))
|
||||
(check-true found?)
|
||||
(check-equal? (rope->string r) text)
|
||||
(loop (- n 1) r)))
|
||||
|
||||
(let*-values (((r) (set-mark (rope-concat rope-pieces) 'mark 9))
|
||||
((_) (check-equal? (rope->string r) text))
|
||||
((pos r) (lookup-mark r 'mark))
|
||||
((_) (check-equal? pos 9))
|
||||
((r) (clear-mark r 'mark))
|
||||
((pos r) (lookup-mark r 'mark))
|
||||
((_) (check-false pos))
|
||||
((r) (update-mark r 'mark 9))
|
||||
((pos r) (lookup-mark r 'mark))
|
||||
((_) (check-equal? pos 9))
|
||||
((r) (update-mark r 'mark 6))
|
||||
((pos r) (lookup-mark r 'mark))
|
||||
((_) (check-equal? pos 6))
|
||||
((l ms r) (rope-split r pos))
|
||||
((_) (check-equal? ms (set 'mark)))
|
||||
((_) (check-equal? (rope->string l) (substring text 0 6)))
|
||||
((_) (check-equal? (rope->string r) (substring text 6 (string-length text))))
|
||||
((l ms r) (rope-split r 3))
|
||||
((_) (check-equal? ms (set)))
|
||||
((_) (check-equal? (rope->string l) (substring text 6 9)))
|
||||
((_) (check-equal? (rope->string r) (substring text 9 (string-length text)))))
|
||||
(void)))
|
||||
|
||||
(define prejudice-pieces
|
||||
(list "It is a truth universally acknowledged, that a single man in possession of a good fortune must be in want of a wife.\n"
|
||||
"\n"
|
||||
"However little known the feelings or views of such a man may be on his first entering a neighbourhood, this truth is so well fixed in the minds of the surrounding families, that he is considered as the rightful property of some one or other of their daughters.\n"
|
||||
"\n"
|
||||
"``My dear Mr. Bennet,'' said his lady to him one day, ``have you heard that Netherfield Park is let at last?''\n"
|
||||
"\n"
|
||||
"Mr. Bennet replied that he had not.\n"))
|
||||
|
||||
(define (atomize-pieces pieces)
|
||||
(map string (string->list (string-append* pieces))))
|
||||
|
||||
(test-with-pieces (list "hello" ", " "world"))
|
||||
(test-with-pieces prejudice-pieces)
|
||||
(test-with-pieces (atomize-pieces prejudice-pieces))
|
||||
)
|
Loading…
Reference in New Issue