diff --git a/crossfire/codegen.template b/crossfire/codegen.template index d2926c0..3b36478 100644 --- a/crossfire/codegen.template +++ b/crossfire/codegen.template @@ -1,35 +1,56 @@ +@; vim: ft=c #include #include #include +#include -@(define (output-charset num cset) - @list{ - unsigned char @(format "cset~a" num) [] = { - @(string-join (map number->string cset) ",") - }; - size_t @(format "cset~a_size" num) = @(length cset); +@(define c-type "uint64_t") +@(define c-type-fmt "%lx") + +typedef @c-type vartype; + +typedef struct { + vartype start; + vartype end; +} interval; + +typedef struct { + size_t length; + interval values[]; +} iset; + +@(define (output-iset num iset) + (define name (format "iset~a" num)) + (define len (length iset)) + @list{iset @name = @"{" @len @"," @"{" @(add-between + (for/list ([ival (in-list iset)]) + @list{@"{" @car[ival] @"," @cdr[ival] @"}"}) + ",") @"}}" ; }) -@(for/list ([i (in-naturals)] [cset (in-vector pattern)]) - (output-charset i cset)) +@(for/list ([i (in-naturals)] [iset (in-vector pattern)]) + (output-iset i iset)) int main() { - char buf [ @(+ 2 (vector-length pattern)) ]; - buf [ @(vector-length pattern) ] = '\n'; - buf [ @(add1 (vector-length pattern)) ] = '\0'; + char buf [ @(* 20 (vector-length pattern)) ]; @(for/list ([num (in-naturals)] [cset (in-vector pattern)]) (define iter (format "i~a" num)) - (define cset (format "cset~a" num)) - (define csetsize (format "cset~a_size" num)) + (define viter (format "v~a" num)) + (define iset (format "iset~a" num)) @list{ - for (size_t @iter = 0; @iter < @csetsize ; @iter ++) @"{" - @"buf[" @num @"]" = @cset @"[" @iter @"];" - @"\n" + for (size_t @iter = 0; @iter < @iset @".length" ; @iter ++) @"{" + for (vartype @viter = @iset .values[ @iter ].start; @viter < @iset .values[ @iter ].end; @viter ++) @"{" + }) - fwrite(buf, 1, @(add1 (vector-length pattern)), stdout); + @(define vs + (string-join (for/list ([i (in-range (vector-length pattern))]) (format "v~a" i)) ",")) + @(define fmt + (string-join (for/list ([i (in-range (vector-length pattern))]) c-type-fmt) " ")) - @(for/list ([cset (in-vector pattern)]) "}") + ssize_t res = snprintf(buf, sizeof(buf), @(format "\"~a\\n\"" fmt), @vs ); + fwrite(buf, res, 1, stdout); + + @(for/list ([cset (in-vector pattern)]) "}}") } - diff --git a/crossfire/main.rkt b/crossfire/main.rkt index 0934f28..3fd434c 100644 --- a/crossfire/main.rkt +++ b/crossfire/main.rkt @@ -37,23 +37,58 @@ ;; manifest.rkt processing -(define (string->charset str) - (for/set ([ch (in-string str)]) - (char->integer ch))) +(define (make-iset) '()) -(define (range->charset a b) - (for/set ([i (in-range a b)]) i)) +(define (iset-add iset start end) + (when (> start end) + (error "invalid interval" start end)) -(define builtin-charsets - (hash "?l" (string->charset "abcdefghijklmnopqrstuvwxyz") - "?u" (string->charset "ABCDEFGHIJKLMNOPQRSTUVWXYZ") - "?d" (string->charset "0123456789") - "?s" (string->charset " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") - "?a" (range->charset 32 127) - "?b" (range->charset 0 256))) + (define (iset-merge iset) + (define (iset-merge-helper iset acc) + (match iset + ['() (reverse acc)] + [(cons (cons fs fe) r) + (match-define (cons (cons afs afe) ar) acc) + (if (<= fs afe) + (iset-merge-helper r (cons (cons afs (max fe afe)) ar)) + (iset-merge-helper r (cons (cons fs fe) acc)))])) + + (if (empty? iset) + iset + (iset-merge-helper (rest iset) (list (first iset))))) + + (define (iset-insert iset start end) + (match iset + ['() (list (cons start end))] + [(cons (cons fs fe) r) + (if (>= fs start) + (cons (cons start end) iset) + (cons (cons fs fe) (iset-insert r start end)))])) + + (iset-merge (iset-insert iset start end))) + +(define (char->iset ch) + (define cp (char->integer ch)) + (iset-add (make-iset) cp (add1 cp))) + +(define (string->iset str) + (for/fold ([iset (make-iset)]) ([ch (in-string str)]) + (define cp (char->integer ch)) + (iset-add iset cp (add1 cp)))) + +(define (range->iset a b) + (iset-add (make-iset) a b)) + +(define builtin-isets + (hash "?l" (string->iset "abcdefghijklmnopqrstuvwxyz") + "?u" (string->iset "ABCDEFGHIJKLMNOPQRSTUVWXYZ") + "?d" (string->iset "0123456789") + "?s" (string->iset " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") + "?a" (range->iset 32 127) + "?b" (range->iset 0 256))) (define (parse-manifest manifest-def) - (struct manifest [name mode command charsets pattern] #:transparent) + (struct manifest [name mode command isets pattern] #:transparent) (define (eq/m x) (curry equal? x)) @@ -66,36 +101,40 @@ (error "manifest attribute missing:" idsym)))])) (define mf - (for/fold ([mf (manifest #f 'stdin #f builtin-charsets #f)]) + (for/fold ([mf (manifest #f 'stdin #f builtin-isets #f)]) ([line (in-list manifest-def)]) (match line [(list (? (eq/m 'name)) name) (struct-copy manifest mf [name name])] [(list (? (eq/m 'mode)) mode) (struct-copy manifest mf [mode mode])] [(list (? (eq/m 'command)) command) (struct-copy manifest mf [command command])] - [(list (? (eq/m 'charset)) name val) + [(list (? (eq/m 'iset)) name val) (struct-copy manifest mf - [charsets (hash-set (manifest-charsets mf) - (symbol->string name) (string->charset val))])] + [isets (hash-set (manifest-isets mf) + (symbol->string name) (string->iset val))])] [(list (? (eq/m 'pattern)) pattern) (struct-copy manifest mf [pattern pattern])]))) (check-false name) (check-false command) (check-false pattern) - (define charsets (manifest-charsets mf)) + (define isets (manifest-isets mf)) (define pattern (for/vector ([x (in-list (regexp-match* #rx"\\??." (manifest-pattern mf)))]) (if (= 1 (string-length x)) - (list (char->integer (string-ref x 0))) - (set->list (hash-ref charsets x (lambda () (error "no such charset defined" x))))))) + (char->iset (string-ref x 0)) + (hash-ref isets x (lambda () (error "no such iset defined" x)))))) (values (manifest-name mf) (manifest-mode mf) (manifest-command mf) pattern)) ;; pattern processing +(define (iset-count iset) + (for/sum ([ival (in-list iset)]) + (- (cdr ival) (car ival)))) + (define (pattern-count pattern) (for/fold ([sum 1]) ([p (in-vector pattern)]) - (* sum (set-count p)))) + (* sum (iset-count p)))) ;; design recipe violations follow ;; (sorry) @@ -124,7 +163,8 @@ '((name "test") (mode stdin) (command ("meme")) - (pattern "test?d?a?a?a")))) + (pattern "test?d?a?a?s")))) ; (pattern-generate pattern (current-output-port)) +; (displayln (pattern-count pattern)) (pattern-codegen pattern)