Check-in [a8011c42a0]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:updated irregex
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: a8011c42a0fd1c40ecd224f8f8807f163edb80cb
User & Date: aldo 2024-09-27 18:19:17
Context
2024-09-27
18:29
merge 7164164ed6 check-in: 6834d66b60 user: aldo tags: trunk
18:19
updated irregex check-in: a8011c42a0 user: aldo tags: trunk
2019-04-11
18:15
add sql field to sqlite3 statement check-in: 538cf8286e user: aldo tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to irregex.sls.

53
54
55
56
57
58
59

60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
..
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103












104
105
106
107
108
109
110
...
394
395
396
397
398
399
400









401
402
403
404
405
406
407
408
...
410
411
412
413
414
415
416




417

418
419
420
421
422
423
424
...
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
...
886
887
888
889
890
891
892


893
894
895
896
897
898
899
900
...
929
930
931
932
933
934
935
936


937
938
939
940
941
942
943


944
945
946
947
948
949
950
951
952
953
....
1009
1010
1011
1012
1013
1014
1015
1016

1017
1018
1019
1020
1021
1022
1023
....
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414

1415
1416
1417
1418
1419
1420
1421
1422
....
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
....
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654












1655

1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
....
2194
2195
2196
2197
2198
2199
2200





2201
2202
2203
2204
2205

2206
2207
2208
2209
2210
2211
2212
2213
....
2241
2242
2243
2244
2245
2246
2247

2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
....
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
....
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
....
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
....
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
....
3184
3185
3186
3187
3188
3189
3190






3191
3192
3193
3194






3195
3196
3197
3198
3199
3200
3201
....
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469


3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
....
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
....
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
....
3929
3930
3931
3932
3933
3934
3935
3936
3937

3938
3939
3940
3941
3942
3943
3944
3945
....
3992
3993
3994
3995
3996
3997
3998

3999
4000

4001
4002
4003
4004
4005












4006
4007
4008
    irregex-lengths
    irregex-names
    irregex-num-submatches
    irregex-extract
    irregex-split
    sre->cset)
  (import 

    (except (rnrs) error find filter remove)
    (rnrs r5rs)
    (rnrs mutable-pairs)
    (rnrs mutable-strings)
    (only (chezscheme) include))

  ;; definition from irregex
  (define (error msg . args)
    (display msg)
    (for-each (lambda (x) (display " ") (write x)) args)
    (newline)
    (0))
  ;;;; irregex.scm -- IrRegular Expressions
;;
;; Copyright (c) 2005-2015 Alex Shinn.  All rights reserved.
;; BSD-style license: http://synthcode.com/license.txt

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; At this moment there was a loud ring at the bell, and I could
;; hear Mrs. Hudson, our landlady, raising her voice in a wail of
;; expostulation and dismay.
;;
................................................................................
;; "No, it's not quite so bad as that.  It is the unofficial
;; force, -- the Baker Street irregulars."

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Notes
;;
;; This code should not require any porting - it should work out of
;; the box in any R[45]RS Scheme implementation.  Slight modifications
;; are needed for R6RS (a separate R6RS-compatible version is included
;; in the distribution as irregex-r6rs.scm).
;;
;; The goal of portability makes this code a little clumsy and
;; inefficient.  Future versions will include both cleanup and
;; performance tuning, but you can only go so far while staying
;; portable.  AND-LET*, SRFI-9 records and custom macros would've been
;; nice.

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; History












;; 0.9.4: 2015/12/14 - performance improvement for {n,m} matches
;; 0.9.3: 2014/07/01 - R7RS library
;; 0.9.2: 2012/11/29 - fixed a bug in -fold on conditional bos patterns
;; 0.9.1: 2012/11/27 - various accumulated bugfixes
;; 0.9.0: 2012/06/03 - Using tags for match extraction from Peter Bex.
;; 0.8.3: 2011/12/18 - various accumulated bugfixes
;; 0.8.2: 2010/08/28 - (...)? submatch extraction fix and alternate
................................................................................
;; For look-behind searches, wrap an existing chunker such that it
;; returns the same results but ends at a given point.
(define (wrap-end-chunker cnk src i)
  (make-irregex-chunker
   (lambda (x) (and (not (eq? x src)) ((chunker-get-next cnk) x)))
   (chunker-get-str cnk)
   (chunker-get-start cnk)









   (lambda (x) (if (eq? x src) i ((chunker-get-end cnk) x)))
   (chunker-get-substring cnk)
   (chunker-get-subchunk cnk)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; String Utilities

;; Unicode version (skip surrogates)
................................................................................
  `(/ ,(integer->char 0) ,(integer->char #xD7FF)
      ,(integer->char #xE000) ,(integer->char #x10FFFF)))

;; ASCII version, offset to not assume 0-255
;; (define *all-chars* `(/ ,(integer->char (- (char->integer #\space) 32)) ,(integer->char (+ (char->integer #\space) 223))))

;; set to #f to ignore even an explicit request for utf8 handling




(define *allow-utf8-mode?* #t)


;; (define *named-char-properties* '())

(define (string-scan-char str c . o)
  (let ((end (string-length str)))
    (let scan ((i (if (pair? o) (car o) 0)))
      (cond ((= i end) #f)
................................................................................
                (j (if at? (+ i 2) (+ i 1))))
           (read j (lambda (sexp j) (k (list u sexp) j)))))
        ((#\")
         (let scan ((from (+ i 1)) (i (+ i 1)) (res '()))
           (define (collect)
             (if (= from i) res (cons (substring str from i) res)))
           (if (>= i end)
               (error "unterminated string in embeded SRE" str)
               (case (string-ref str i)
                 ((#\") (k (string-cat-reverse (collect)) (+ i 1)))
                 ((#\\) (scan (+ i 1) (+ i 2) (collect)))
                 (else (scan from (+ i 1) res))))))
        ((#\#)
         (case (string-ref str (+ i 1))
           ((#\;)
................................................................................
                             (error "unterminated (*'...) SRE escape" str)
                             (lp (+ j 1) (+ j 1) flags (cons sre (collect)) st))))
                     (error "bad regexp syntax: (*FOO) not supported" str)))
                (else                   ;; (?...) case
                 (case (string-ref str (+ i 2))
                   ((#\#)
                    (let ((j (string-scan-char str #\) (+ i 3))))


                      (lp (+ j i) (+ j 1) flags (collect) st)))
                   ((#\:)
                    (lp (+ i 3) (+ i 3) (flag-clear flags ~save?) '() (save)))
                   ((#\=)
                    (lp (+ i 3) (+ i 3) (flag-clear flags ~save?)
                        '(look-ahead) (save)))
                   ((#\!)
                    (lp (+ i 3) (+ i 3) (flag-clear flags ~save?)
................................................................................
                   ;;((#\R) ; recursion
                   ;; )
                   ((#\()
                    (cond
                     ((>= (+ i 3) end)
                      (error "unterminated parenthesis in regexp" str))
                     ((char-numeric? (string-ref str (+ i 3)))
                      (let* ((j (string-scan-char str #\) (+ i 3)))


                             (n (string->number (substring str (+ i 3) j))))
                        (if (not n)
                            (error "invalid conditional reference" str)
                            (lp (+ j 1) (+ j 1) (flag-clear flags ~save?)
                                `(,n if) (save)))))
                     ((char-alphabetic? (string-ref str (+ i 3)))
                      (let* ((j (string-scan-char str #\) (+ i 3)))


                             (s (string->symbol (substring str (+ i 3) j))))
                        (lp (+ j 1) (+ j 1) (flag-clear flags ~save?)
                            `(,s if) (save))))
                     (else
                      (lp (+ i 2) (+ i 2) (flag-clear flags ~save?)
                          '(if) (save)))))
                   ((#\{)
                    (error "unsupported Perl-style cluster" str))
                   (else
                    (let ((old-flags flags))
................................................................................
                 (let ((res (collect/single)))
                   (cond
                    ((null? res)
                     (error "{ can't follow empty pattern"))
                    (else
                     (let* ((x (car res))
                            (tail (cdr res))
                            (j (string-scan-char str #\} (+ i 1)))

                            (s2 (string-split-char (substring str (+ i 1) j)
                                                   #\,))
                            (n (string->number (car s2)))
                            (m (and (pair? (cdr s2))
                                    (string->number (cadr s2)))))
                       (cond
                        ((or (not n)
................................................................................
  (let ((lo-ls (char->utf8-list lo))
        (hi-ls (char->utf8-list hi)))
    (if (not (= (length lo-ls) (length hi-ls)))
        (sre-alternate (list (unicode-range-climb-digits lo-ls hi-ls)
                             (unicode-range-up-to hi-ls)))
        (let lp ((lo-ls lo-ls) (hi-ls hi-ls))
          (cond
           ((null? lo-ls)
            '())
           ((= (car lo-ls) (car hi-ls))
            (sre-sequence
             (list (integer->char (car lo-ls))

                   (lp (cdr lo-ls) (cdr hi-ls)))))
           ((= (+ (car lo-ls) 1) (car hi-ls))
            (sre-alternate (list (unicode-range-up-from lo-ls)
                                 (unicode-range-up-to hi-ls))))
           (else
            (sre-alternate (list (unicode-range-up-from lo-ls)
                                 (unicode-range-middle lo-ls hi-ls)
                                 (unicode-range-up-to hi-ls)))))))))
................................................................................
           ((any) '(* any))
           ((nonl) '(* nonl))
           (else (cons '* (map rec (cdr sre))))))
        (else
         (cons (car sre) (map rec (cdr sre))))))
     (else
      (case sre
        ((any) 'utf8-any)
        ((nonl) 'utf8-nonl)
        (else
         (if (and utf8? (char? sre) (high-char? sre))
             (sre-sequence (map integer->char (char->utf8-list sre)))
             sre)))))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Compilation
................................................................................
  (and (pair? sre)
       (or (memq (car sre) '(* +))
           (and (memq (car sre) '($ submatch => submatch-named seq :))
                (pair? (cdr sre))
                (null? (cddr sre))
                (sre-repeater? (cadr sre))))))

(define (sre-searcher? sre)
  (if (pair? sre)
      (case (car sre)
        ((* +) (sre-any? (sre-sequence (cdr sre))))
        ((seq : $ submatch => submatch-named)
         (and (pair? (cdr sre)) (sre-searcher? (cadr sre))))
        ((or) (every sre-searcher? (cdr sre)))
        (else #f))
      (eq? 'bos sre)))













(define (sre-consumer? sre)

  (if (pair? sre)
      (case (car sre)
        ((* +) (sre-any? (sre-sequence (cdr sre))))
        ((seq : $ submatch => submatch-named)
         (and (pair? (cdr sre)) (sre-consumer? (last sre))))
        ((or) (every sre-consumer? (cdr sre)))
        (else #f))
      (eq? 'eos sre)))

(define (sre-has-submatches? sre)
  (and (pair? sre)
       (or (memq (car sre) '($ submatch => submatch-named))
           (if (eq? 'posix-string (car sre))
               (sre-has-submatches? (string->sre (cadr sre)))
               (any sre-has-submatches? (cdr sre))))))
................................................................................
                      (cond ((and finalizer (not new-finalizer))
                             (finalize! finalizer memory matches)))
                      (for-each (lambda (s)
                                  (let ((slot (vector-ref memory (cdr s)))
                                        (chunk&position (cons src (+ i 1))))
                                    (vector-set! slot (car s) chunk&position)))
                                (cdr cmds))





                      (for-each (lambda (c)
                                  (let* ((tag (vector-ref c 0))
                                         (ss (vector-ref memory (vector-ref c 1)))
                                         (ds (vector-ref memory (vector-ref c 2))))
                                    (vector-set! ds tag (vector-ref ss tag))))

                                (car cmds)))))
                  (if new-finalizer
                      (lp2 (+ i 1) next src (+ i 1) new-finalizer)
                      (lp2 (+ i 1) next res-src res-index #f))))
               (res-src
                (cond
                 (index
                  (irregex-match-end-chunk-set! matches index res-src)
................................................................................
    (punctuation . (or #\! #\" #\# #\% #\& #\' #\( #\) #\* #\, #\- #\.
                       #\/ #\: #\; #\? #\@ #\[ #\\ #\] #\_ #\{ #\}))
    (punct . punctuation)
    (graphic
     . (or alphanumeric punctuation #\$ #\+ #\< #\= #\> #\^ #\` #\| #\~))
    (graph . graphic)
    (blank . (or #\space ,(integer->char (- (char->integer #\space) 23))))

    (whitespace . (or blank #\newline))
    (space . whitespace)
    (white . whitespace)
    (printing or graphic whitespace)
    (print . printing)

    ;; XXXX we assume a (possibly shifted) ASCII-based ordering
    (control . (/ ,(integer->char (- (char->integer #\space) 32))
                  ,(integer->char (- (char->integer #\space) 1))))
    (cntrl . control)
    (hex-digit . (or numeric (/ #\a #\f #\A #\F)))
................................................................................
    (ipv6-address . (seq (** 0 4 hex-digit)
                         (** 1 7 #\: (? #\:) (** 0 4 hex-digit))))
    (ip-address . (or ipv4-address ipv6-address))
    (domain-atom . (+ (or alphanumeric #\_ #\-)))
    (domain . (seq domain-atom (+ #\. domain-atom)))
    ;; XXXX now anything can be a top-level domain, but this is still handy
    (top-level-domain . (w/nocase (or "arpa" "com" "gov" "mil" "net" "org"
                                      "aero" "biz" "coop" "info" "museum"
                                      "name" "pro" (= 2 alpha))))
    (domain/common . (seq (+ domain-atom #\.) top-level-domain))
    ;;(email-local-part . (seq (+ (or (~ #\") string))))
    (email-local-part . (+ (or alphanumeric #\_ #\- #\. #\+)))
    (email . (seq email-local-part #\@ domain))
    (url-char . (or alnum #\_ #\- #\+ #\\ #\= #\~ #\. #\, #\& #\;
                    (seq "%" hex-digit hex-digit)))
    (url-final-char . (or alnum #\_ #\- #\+ #\\ #\= #\~ #\&
                          (seq "%" hex-digit hex-digit)))
    (http-url . (w/nocase
                 "http" (? "s") "://"
                 (or domain/common ipv4-address) ;; (seq "[" ipv6-address "]")
                 (? ":" (+ numeric)) ;; port
                 ;; path
                 (? "/" (* (or url-char "/"))
                    (? "?" (* url-char))                      ;; query
                    (? "#" (? (* url-char) url-final-char)) ;; fragment
                    )))

................................................................................
                                      next))
                               (a (and b
                                       (lp (list (cadar ls))
                                           (new-state-number (max b next))
                                           flags
                                           next))))
                          (and a
                               (let ((c (add-state! (new-state-number a)
                                                    '())))
                                 (nfa-add-epsilon! buf c a #f)
                                 (nfa-add-epsilon! buf c b #f)
                                 c)))))))
                ((?)
                 (let ((next (lp (cdr ls) n flags next)))
                   ;; insert an epsilon transition directly to next
................................................................................
;; closure, but has different tag value mappings.
;; If found, calculate reordering commands so we can map the closure
;; to that state instead of adding a new DFA state.
;; This is completely handwaved away in Laurikari's paper (it basically
;; says "insert reordering algorithm here"), so this code was constructed
;; after some experimentation.  In other words, bugs be here.
(define (find-reorder-commands-internal nfa closure dfa-states)
  (let ((num-states (nfa-num-states nfa))
        (num-tags (nfa-num-tags nfa))
        (closure-summary (mst-mappings-summary closure)))
    (let lp ((dfa-states dfa-states))
      (if (null? dfa-states)
          #f
          (if (not (mst-same-states? (caar dfa-states) closure))
              (lp (cdr dfa-states))
              (let lp2 ((state-summary (mst-mappings-summary (caar dfa-states)))
................................................................................
                 (next cnk init src str i end matches
                       (lambda () (body cnk init src str i end matches fail))))))
            ((*)
             (cond
              ((sre-empty? (sre-sequence (cdr sre)))
               (error "invalid sre: empty *" sre))
              (else
               (letrec
                   ((body
                     (lp (sre-sequence (cdr sre))
                         n
                         flags
                         (lambda (cnk init src str i end matches fail)
                           (body cnk init src str i end matches
                                 (lambda ()
                                   (next cnk init src str i end matches fail)
                                   ))))))
                 (lambda (cnk init src str i end matches fail)
                   (body cnk init src str i end matches
                         (lambda ()
                           (next cnk init src str i end matches fail))))))))
            ((*?)
             (cond
              ((sre-empty? (sre-sequence (cdr sre)))
................................................................................
                                   (body cnk init src str i end matches fail)
                                   ))))))
                 (lambda (cnk init src str i end matches fail)
                   (next cnk init src str i end matches
                         (lambda ()
                           (body cnk init src str i end matches fail))))))))
            ((+)






             (lp (sre-sequence (cdr sre))
                 n
                 flags
                 (rec (list '* (sre-sequence (cdr sre))))))






            ((=)
             (rec `(** ,(cadr sre) ,(cadr sre) ,@(cddr sre))))
            ((>=)
             (rec `(** ,(cadr sre) #f ,@(cddr sre))))
            ((**)
             (cond
              ((or (and (number? (cadr sre))
................................................................................
        ((bos)
         (lambda (cnk init src str i end matches fail)
           (if (and (eq? src (car init)) (eqv? i (cdr init)))
               (next cnk init src str i end matches fail)
               (fail))))
        ((bol)
         (lambda (cnk init src str i end matches fail)
           (if (or (and (eq? src (car init)) (eqv? i (cdr init)))
                   (and (> i ((chunker-get-start cnk) src))
                        (eqv? #\newline (string-ref str (- i 1)))))


               (next cnk init src str i end matches fail)
               (fail))))
        ((bow)
         (lambda (cnk init src str i end matches fail)
           (if (and (or (if (> i ((chunker-get-start cnk) src))
                            (not (char-alphanumeric? (string-ref str (- i 1))))
                            (let ((ch (chunker-prev-char cnk src end)))
                              (and ch (not (char-alphanumeric? ch)))))
                        (and (eq? src (car init)) (eqv? i (cdr init))))
                    (if (< i end)
                        (char-alphanumeric? (string-ref str i))
                        (let ((next ((chunker-get-next cnk) src)))
                          (and next
                               (char-alphanumeric?
                                (string-ref ((chunker-get-str cnk) next)
                                            ((chunker-get-start cnk) next)))))))
................................................................................
         (init-src (list str start end))
         (init (cons init-src start)))
    (if (not (and (integer? start) (exact? start)))
        (error "irregex-fold: not an exact integer" start))
    (if (not (and (integer? end) (exact? end)))
        (error "irregex-fold: not an exact integer" end))
    (irregex-match-chunker-set! matches irregex-basic-string-chunker)
    (let lp ((src init-src) (i start) (acc knil))
      (if (>= i end)
          (finish i acc)
          (let ((m (irregex-search/matches
                    irx
                    irregex-basic-string-chunker
                    init
                    src
                    i
                    matches)))
            (if (not m)
                (finish i acc)
                (let ((j (%irregex-match-end-index m 0)))
                  (if (= j i)
                      ;; skip one char forward if we match the empty string
                      (lp (list str (+ j 1) end) (+ j 1) acc)
                      (let ((acc (kons i m acc)))
                        (irregex-reset-matches! matches)
                        ;; no need to continue looping if this is a
                        ;; searcher - it's already consumed the only
                        ;; available match
                        (if (flag-set? (irregex-flags irx) ~searcher?)
                            (finish j acc)
                            (lp (list str j end) j acc)))))))))))

(define (irregex-fold irx kons . args)
  (if (not (procedure? kons)) (error "irregex-fold: not a procedure" kons))
  (let ((kons2 (lambda (i m acc) (kons i (irregex-copy-matches m) acc))))
    (apply irregex-fold/fast irx kons2 args)))

(define (irregex-fold/chunked/fast irx kons knil cnk start . o)
................................................................................
                  (if (and (eq? end-src start) (= end-index i))
                      (if (>= end-index ((chunker-get-end cnk) end-src ))
                          (let ((next ((chunker-get-next cnk) end-src)))
                            (lp next ((chunker-get-start cnk) next) acc))
                          (lp end-src (+ end-index 1) acc))
                      (let ((acc (kons start i m acc)))
                        (irregex-reset-matches! matches)
                        ;; no need to continue looping if this is a
                        ;; searcher - it's already consumed the only
                        ;; available match
                        (if (flag-set? (irregex-flags irx) ~searcher?)
                            (finish end-src end-index acc)
                            (lp end-src end-index acc)))))))))))

(define (irregex-fold/chunked irx kons . args)
  (if (not (procedure? kons)) (error "irregex-fold/chunked: not a procedure" kons))
  (let ((kons2 (lambda (s i m acc) (kons s i (irregex-copy-matches m) acc))))
    (apply irregex-fold/chunked/fast irx kons2 args)))
................................................................................

(define (irregex-replace/all irx str . o)
  (if (not (string? str)) (error "irregex-replace/all: not a string" str))
  (irregex-fold/fast
   irx
   (lambda (i m acc)
     (let ((m-start (%irregex-match-start-index m 0)))
       (append (irregex-apply-match m o)
               (if (>= i m-start)

                   acc
                   (cons (substring str i m-start) acc)))))
   '()
   str
   (lambda (i acc)
     (let ((end (string-length str)))
       (string-cat-reverse (if (>= i end)
                               acc
................................................................................
(define (irregex-split irx str . o)
  (if (not (string? str)) (error "irregex-split: not a string" str))
  (let ((start (if (pair? o) (car o) 0))
        (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str))))
    (irregex-fold/fast
     irx
     (lambda (i m a)

       (if (= i (%irregex-match-start-index m 0))
           a

           (cons (substring str i (%irregex-match-start-index m 0)) a)))
     '()
     str
     (lambda (i a)
       (reverse (if (= i end) a (cons (substring str i end) a))))












     start
     end)))
)







>












|

|







 







|











>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
|







 







>
>
>
>
|
>







 







|







 







>
>
|







 







|
>
>
|
|
|
|
|

|
>
>
|
|
|







 







|
>







 







<
<


|
>
|







 







|
|







 







|


<

|
|



>
>
>
>
>
>
>
>
>
>
>
>

>
|
|
|
|
|
|
|
<







 







>
>
>
>
>
|
|
|
|
|
>
|







 







>
|


|







 







|
|










|







 







|







 







<
|







 







<
<
|
<
<
<
<
<
<
<







 







>
>
>
>
>
>
|
|
|
<
>
>
>
>
>
>







 







<
|
|
>
>




|
|
|
|
<







 







|

|








|
|
|
|
|
|
|
|
|
|
|
|
|







 







<
<
<
|







 







<
|
>
|







 







>
|
|
>
|



|
>
>
>
>
>
>
>
>
>
>
>
>



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
..
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
...
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
...
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
...
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
...
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
...
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
....
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
....
1437
1438
1439
1440
1441
1442
1443


1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
....
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
....
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680

1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707

1708
1709
1710
1711
1712
1713
1714
....
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
....
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
....
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
....
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
....
3081
3082
3083
3084
3085
3086
3087

3088
3089
3090
3091
3092
3093
3094
3095
....
3200
3201
3202
3203
3204
3205
3206


3207







3208
3209
3210
3211
3212
3213
3214
....
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240

3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
....
3512
3513
3514
3515
3516
3517
3518

3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530

3531
3532
3533
3534
3535
3536
3537
....
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
....
3952
3953
3954
3955
3956
3957
3958



3959
3960
3961
3962
3963
3964
3965
3966
....
3978
3979
3980
3981
3982
3983
3984

3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
....
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
    irregex-lengths
    irregex-names
    irregex-num-submatches
    irregex-extract
    irregex-split
    sre->cset)
  (import 
    (srfi s0 cond-expand)
    (except (rnrs) error find filter remove)
    (rnrs r5rs)
    (rnrs mutable-pairs)
    (rnrs mutable-strings)
    (only (chezscheme) include))

  ;; definition from irregex
  (define (error msg . args)
    (display msg)
    (for-each (lambda (x) (display " ") (write x)) args)
    (newline)
    (0))
;;;; irregex.scm -- IrRegular Expressions
;;
;; Copyright (c) 2005-2024 Alex Shinn.  All rights reserved.
;; BSD-style license: http://synthcode.com/license.txt

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; At this moment there was a loud ring at the bell, and I could
;; hear Mrs. Hudson, our landlady, raising her voice in a wail of
;; expostulation and dismay.
;;
................................................................................
;; "No, it's not quite so bad as that.  It is the unofficial
;; force, -- the Baker Street irregulars."

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Notes
;;
;; This code should not require any porting - it should work out of
;; the box in any R[457]RS Scheme implementation.  Slight modifications
;; are needed for R6RS (a separate R6RS-compatible version is included
;; in the distribution as irregex-r6rs.scm).
;;
;; The goal of portability makes this code a little clumsy and
;; inefficient.  Future versions will include both cleanup and
;; performance tuning, but you can only go so far while staying
;; portable.  AND-LET*, SRFI-9 records and custom macros would've been
;; nice.

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; History
;; 0.9.11: 2024/02/23 - Guile test and packaging support from Tomas Volf.
;; 0.9.10: 2021/07/06 - fixes for submatches under kleene star, empty seqs
;;                     in alternations, and bol in folds for backtracking
;;                     matcher (thanks John Clements and snan for reporting
;;                     and Peter Bex for fixing)
;; 0.9.9: 2021/05/14 - more comprehensive fix for repeated empty matches
;; 0.9.8: 2020/07/13 - fix irregex-replace/all with look-behind patterns
;; 0.9.7: 2019/12/31 - more intuitive handling of empty matches in -fold,
;;                     -replace and -split
;; 0.9.6: 2016/12/05 - fixed exponential memory use of + in compilation
;;                     of backtracking matcher (CVE-2016-9954).
;; 0.9.5: 2016/09/10 - fixed a bug in irregex-fold handling of bow
;; 0.9.4: 2015/12/14 - performance improvement for {n,m} matches
;; 0.9.3: 2014/07/01 - R7RS library
;; 0.9.2: 2012/11/29 - fixed a bug in -fold on conditional bos patterns
;; 0.9.1: 2012/11/27 - various accumulated bugfixes
;; 0.9.0: 2012/06/03 - Using tags for match extraction from Peter Bex.
;; 0.8.3: 2011/12/18 - various accumulated bugfixes
;; 0.8.2: 2010/08/28 - (...)? submatch extraction fix and alternate
................................................................................
;; For look-behind searches, wrap an existing chunker such that it
;; returns the same results but ends at a given point.
(define (wrap-end-chunker cnk src i)
  (make-irregex-chunker
   (lambda (x) (and (not (eq? x src)) ((chunker-get-next cnk) x)))
   (chunker-get-str cnk)
   (chunker-get-start cnk)
   (lambda (x)
     ;; TODO: this is a hack workaround for the fact that we don't
     ;; have either a notion of chunk equivalence or chunk truncation,
     ;; until which time (neg-)look-behind in a fold won't work on
     ;; non-basic chunks.
     (if (or (eq? x src)
             (and (not ((chunker-get-next cnk) x))
                  (not ((chunker-get-next cnk) src))))
         i
         ((chunker-get-end cnk) x)))
   (chunker-get-substring cnk)
   (chunker-get-subchunk cnk)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; String Utilities

;; Unicode version (skip surrogates)
................................................................................
  `(/ ,(integer->char 0) ,(integer->char #xD7FF)
      ,(integer->char #xE000) ,(integer->char #x10FFFF)))

;; ASCII version, offset to not assume 0-255
;; (define *all-chars* `(/ ,(integer->char (- (char->integer #\space) 32)) ,(integer->char (+ (char->integer #\space) 223))))

;; set to #f to ignore even an explicit request for utf8 handling
;; The utf8-mode is undesired on any implementation with native unicode support.
;; It is a workaround for those that treat strings as a raw byte sequences, and
;; does not work well otherwise.  So disable it on implementations known to
;; handle unicode natively.
(define *allow-utf8-mode?* (cond-expand ((and chicken (not full-unicode)) #t)
                                        (else #f)))

;; (define *named-char-properties* '())

(define (string-scan-char str c . o)
  (let ((end (string-length str)))
    (let scan ((i (if (pair? o) (car o) 0)))
      (cond ((= i end) #f)
................................................................................
                (j (if at? (+ i 2) (+ i 1))))
           (read j (lambda (sexp j) (k (list u sexp) j)))))
        ((#\")
         (let scan ((from (+ i 1)) (i (+ i 1)) (res '()))
           (define (collect)
             (if (= from i) res (cons (substring str from i) res)))
           (if (>= i end)
               (error "unterminated string in embedded SRE" str)
               (case (string-ref str i)
                 ((#\") (k (string-cat-reverse (collect)) (+ i 1)))
                 ((#\\) (scan (+ i 1) (+ i 2) (collect)))
                 (else (scan from (+ i 1) res))))))
        ((#\#)
         (case (string-ref str (+ i 1))
           ((#\;)
................................................................................
                             (error "unterminated (*'...) SRE escape" str)
                             (lp (+ j 1) (+ j 1) flags (cons sre (collect)) st))))
                     (error "bad regexp syntax: (*FOO) not supported" str)))
                (else                   ;; (?...) case
                 (case (string-ref str (+ i 2))
                   ((#\#)
                    (let ((j (string-scan-char str #\) (+ i 3))))
                      (if (not j)
                          (error "missing ) after #" str)
                          (lp (+ j i) (+ j 1) flags (collect) st))))
                   ((#\:)
                    (lp (+ i 3) (+ i 3) (flag-clear flags ~save?) '() (save)))
                   ((#\=)
                    (lp (+ i 3) (+ i 3) (flag-clear flags ~save?)
                        '(look-ahead) (save)))
                   ((#\!)
                    (lp (+ i 3) (+ i 3) (flag-clear flags ~save?)
................................................................................
                   ;;((#\R) ; recursion
                   ;; )
                   ((#\()
                    (cond
                     ((>= (+ i 3) end)
                      (error "unterminated parenthesis in regexp" str))
                     ((char-numeric? (string-ref str (+ i 3)))
                      (let ((j (string-scan-char str #\) (+ i 3))))
                        (if (not j)
                            (error "missing closing parenthesis" str i)
                            (let ((n (string->number (substring str (+ i 3) j))))
                              (if (not n)
                                  (error "invalid conditional reference" str)
                                  (lp (+ j 1) (+ j 1) (flag-clear flags ~save?)
                                      `(,n if) (save)))))))
                     ((char-alphabetic? (string-ref str (+ i 3)))
                      (let ((j (string-scan-char str #\) (+ i 3))))
                        (if (not j)
                            (error "missing closing parenthesis" str i)
                            (let ((s (string->symbol (substring str (+ i 3) j))))
                              (lp (+ j 1) (+ j 1) (flag-clear flags ~save?)
                                  `(,s if) (save))))))
                     (else
                      (lp (+ i 2) (+ i 2) (flag-clear flags ~save?)
                          '(if) (save)))))
                   ((#\{)
                    (error "unsupported Perl-style cluster" str))
                   (else
                    (let ((old-flags flags))
................................................................................
                 (let ((res (collect/single)))
                   (cond
                    ((null? res)
                     (error "{ can't follow empty pattern"))
                    (else
                     (let* ((x (car res))
                            (tail (cdr res))
                            (j (or (string-scan-char str #\} (+ i 1))
                                   (error "missing closing }" str i)))
                            (s2 (string-split-char (substring str (+ i 1) j)
                                                   #\,))
                            (n (string->number (car s2)))
                            (m (and (pair? (cdr s2))
                                    (string->number (cadr s2)))))
                       (cond
                        ((or (not n)
................................................................................
  (let ((lo-ls (char->utf8-list lo))
        (hi-ls (char->utf8-list hi)))
    (if (not (= (length lo-ls) (length hi-ls)))
        (sre-alternate (list (unicode-range-climb-digits lo-ls hi-ls)
                             (unicode-range-up-to hi-ls)))
        (let lp ((lo-ls lo-ls) (hi-ls hi-ls))
          (cond


           ((= (car lo-ls) (car hi-ls))
            (sre-sequence
              (cons (integer->char (car lo-ls))
                (if (null? (cdr lo-ls)) '()
                    (cons (lp (cdr lo-ls) (cdr hi-ls)) '())))))
           ((= (+ (car lo-ls) 1) (car hi-ls))
            (sre-alternate (list (unicode-range-up-from lo-ls)
                                 (unicode-range-up-to hi-ls))))
           (else
            (sre-alternate (list (unicode-range-up-from lo-ls)
                                 (unicode-range-middle lo-ls hi-ls)
                                 (unicode-range-up-to hi-ls)))))))))
................................................................................
           ((any) '(* any))
           ((nonl) '(* nonl))
           (else (cons '* (map rec (cdr sre))))))
        (else
         (cons (car sre) (map rec (cdr sre))))))
     (else
      (case sre
        ((any) (if utf8? 'utf8-any 'any))
        ((nonl) (if utf8? 'utf8-nonl 'nonl))
        (else
         (if (and utf8? (char? sre) (high-char? sre))
             (sre-sequence (map integer->char (char->utf8-list sre)))
             sre)))))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Compilation
................................................................................
  (and (pair? sre)
       (or (memq (car sre) '(* +))
           (and (memq (car sre) '($ submatch => submatch-named seq :))
                (pair? (cdr sre))
                (null? (cddr sre))
                (sre-repeater? (cadr sre))))))

(define (sre-bos? sre)
  (if (pair? sre)
      (case (car sre)

        ((seq : $ submatch => submatch-named)
         (and (pair? (cdr sre)) (sre-bos? (cadr sre))))
        ((or) (every sre-bos? (cdr sre)))
        (else #f))
      (eq? 'bos sre)))

;; a searcher doesn't need explicit iteration to find the first match
(define (sre-searcher? sre)
  (or (sre-bos? sre)
      (and (pair? sre)
           (case (car sre)
             ((* +) (sre-any? (sre-sequence (cdr sre))))
             ((seq : $ submatch => submatch-named)
              (and (pair? (cdr sre)) (sre-searcher? (cadr sre))))
             ((or) (every sre-searcher? (cdr sre)))
             (else #f)))))

;; a consumer doesn't need to match more than once
(define (sre-consumer? sre)
  (or (sre-bos? sre)
      (and (pair? sre)
           (case (car sre)
             ((* +) (sre-any? (sre-sequence (cdr sre))))
             ((seq : $ submatch => submatch-named)
              (and (pair? (cdr sre)) (sre-consumer? (last sre))))
             ((or) (every sre-consumer? (cdr sre)))
             (else #f)))))


(define (sre-has-submatches? sre)
  (and (pair? sre)
       (or (memq (car sre) '($ submatch => submatch-named))
           (if (eq? 'posix-string (car sre))
               (sre-has-submatches? (string->sre (cadr sre)))
               (any sre-has-submatches? (cdr sre))))))
................................................................................
                      (cond ((and finalizer (not new-finalizer))
                             (finalize! finalizer memory matches)))
                      (for-each (lambda (s)
                                  (let ((slot (vector-ref memory (cdr s)))
                                        (chunk&position (cons src (+ i 1))))
                                    (vector-set! slot (car s) chunk&position)))
                                (cdr cmds))
                      ;; Reassigning commands may be in an order which
                      ;; causes memory cells to be clobbered before
                      ;; they're read out.  Make 2 passes to maintain
                      ;; old values by copying them into a closure.
                      (for-each (lambda (execute!) (execute!))
                                (map (lambda (c)
                                       (let* ((tag (vector-ref c 0))
                                              (ss (vector-ref memory (vector-ref c 1)))
                                              (ds (vector-ref memory (vector-ref c 2)))
                                              (value-from (vector-ref ss tag)))
                                         (lambda () (vector-set! ds tag value-from))))
                                     (car cmds))))))
                  (if new-finalizer
                      (lp2 (+ i 1) next src (+ i 1) new-finalizer)
                      (lp2 (+ i 1) next res-src res-index #f))))
               (res-src
                (cond
                 (index
                  (irregex-match-end-chunk-set! matches index res-src)
................................................................................
    (punctuation . (or #\! #\" #\# #\% #\& #\' #\( #\) #\* #\, #\- #\.
                       #\/ #\: #\; #\? #\@ #\[ #\\ #\] #\_ #\{ #\}))
    (punct . punctuation)
    (graphic
     . (or alphanumeric punctuation #\$ #\+ #\< #\= #\> #\^ #\` #\| #\~))
    (graph . graphic)
    (blank . (or #\space ,(integer->char (- (char->integer #\space) 23))))
    ;; 0B - vertical tab, 0C - form feed
    (whitespace . (or blank #\newline #\x0C #\return #\x0B))
    (space . whitespace)
    (white . whitespace)
    (printing . (or graphic whitespace))
    (print . printing)

    ;; XXXX we assume a (possibly shifted) ASCII-based ordering
    (control . (/ ,(integer->char (- (char->integer #\space) 32))
                  ,(integer->char (- (char->integer #\space) 1))))
    (cntrl . control)
    (hex-digit . (or numeric (/ #\a #\f #\A #\F)))
................................................................................
    (ipv6-address . (seq (** 0 4 hex-digit)
                         (** 1 7 #\: (? #\:) (** 0 4 hex-digit))))
    (ip-address . (or ipv4-address ipv6-address))
    (domain-atom . (+ (or alphanumeric #\_ #\-)))
    (domain . (seq domain-atom (+ #\. domain-atom)))
    ;; XXXX now anything can be a top-level domain, but this is still handy
    (top-level-domain . (w/nocase (or "arpa" "com" "gov" "mil" "net" "org"
                                      "edu" "aero" "biz" "coop" "info"
              "museum" "name" "pro" (= 2 alpha))))
    (domain/common . (seq (+ domain-atom #\.) top-level-domain))
    ;;(email-local-part . (seq (+ (or (~ #\") string))))
    (email-local-part . (+ (or alphanumeric #\_ #\- #\. #\+)))
    (email . (seq email-local-part #\@ domain))
    (url-char . (or alnum #\_ #\- #\+ #\\ #\= #\~ #\. #\, #\& #\;
                    (seq "%" hex-digit hex-digit)))
    (url-final-char . (or alnum #\_ #\- #\+ #\\ #\= #\~ #\&
                          (seq "%" hex-digit hex-digit)))
    (http-url . (w/nocase
                 "http" (? "s") "://"
                 (or domain ipv4-address) ;; (seq "[" ipv6-address "]")
                 (? ":" (+ numeric)) ;; port
                 ;; path
                 (? "/" (* (or url-char "/"))
                    (? "?" (* url-char))                      ;; query
                    (? "#" (? (* url-char) url-final-char)) ;; fragment
                    )))

................................................................................
                                      next))
                               (a (and b
                                       (lp (list (cadar ls))
                                           (new-state-number (max b next))
                                           flags
                                           next))))
                          (and a
                               (let ((c (add-state! (new-state-number (max a b))
                                                    '())))
                                 (nfa-add-epsilon! buf c a #f)
                                 (nfa-add-epsilon! buf c b #f)
                                 c)))))))
                ((?)
                 (let ((next (lp (cdr ls) n flags next)))
                   ;; insert an epsilon transition directly to next
................................................................................
;; closure, but has different tag value mappings.
;; If found, calculate reordering commands so we can map the closure
;; to that state instead of adding a new DFA state.
;; This is completely handwaved away in Laurikari's paper (it basically
;; says "insert reordering algorithm here"), so this code was constructed
;; after some experimentation.  In other words, bugs be here.
(define (find-reorder-commands-internal nfa closure dfa-states)

  (let ((num-tags (nfa-num-tags nfa))
        (closure-summary (mst-mappings-summary closure)))
    (let lp ((dfa-states dfa-states))
      (if (null? dfa-states)
          #f
          (if (not (mst-same-states? (caar dfa-states) closure))
              (lp (cdr dfa-states))
              (let lp2 ((state-summary (mst-mappings-summary (caar dfa-states)))
................................................................................
                 (next cnk init src str i end matches
                       (lambda () (body cnk init src str i end matches fail))))))
            ((*)
             (cond
              ((sre-empty? (sre-sequence (cdr sre)))
               (error "invalid sre: empty *" sre))
              (else


               (let ((body (rec (list '+ (sre-sequence (cdr sre))))))







                 (lambda (cnk init src str i end matches fail)
                   (body cnk init src str i end matches
                         (lambda ()
                           (next cnk init src str i end matches fail))))))))
            ((*?)
             (cond
              ((sre-empty? (sre-sequence (cdr sre)))
................................................................................
                                   (body cnk init src str i end matches fail)
                                   ))))))
                 (lambda (cnk init src str i end matches fail)
                   (next cnk init src str i end matches
                         (lambda ()
                           (body cnk init src str i end matches fail))))))))
            ((+)
             (cond
              ((sre-empty? (sre-sequence (cdr sre)))
               (error "invalid sre: empty +" sre))
              (else
               (letrec
                   ((body
                     (lp (sre-sequence (cdr sre))
                         n
                         flags

                         (lambda (cnk init src str i end matches fail)
                           (body cnk init src str i end matches
                                 (lambda ()
                                   (next cnk init src str i end matches fail)
                                   ))))))
                 body))))
            ((=)
             (rec `(** ,(cadr sre) ,(cadr sre) ,@(cddr sre))))
            ((>=)
             (rec `(** ,(cadr sre) #f ,@(cddr sre))))
            ((**)
             (cond
              ((or (and (number? (cadr sre))
................................................................................
        ((bos)
         (lambda (cnk init src str i end matches fail)
           (if (and (eq? src (car init)) (eqv? i (cdr init)))
               (next cnk init src str i end matches fail)
               (fail))))
        ((bol)
         (lambda (cnk init src str i end matches fail)

           (if (let ((ch (if (> i ((chunker-get-start cnk) src))
                             (string-ref str (- i 1))
                             (chunker-prev-char cnk init src))))
                 (or (not ch) (eqv? #\newline ch)))
               (next cnk init src str i end matches fail)
               (fail))))
        ((bow)
         (lambda (cnk init src str i end matches fail)
           (if (and (if (> i ((chunker-get-start cnk) src))
                        (not (char-alphanumeric? (string-ref str (- i 1))))
                        (let ((ch (chunker-prev-char cnk init src)))
                          (or (not ch) (not (char-alphanumeric? ch)))))

                    (if (< i end)
                        (char-alphanumeric? (string-ref str i))
                        (let ((next ((chunker-get-next cnk) src)))
                          (and next
                               (char-alphanumeric?
                                (string-ref ((chunker-get-str cnk) next)
                                            ((chunker-get-start cnk) next)))))))
................................................................................
         (init-src (list str start end))
         (init (cons init-src start)))
    (if (not (and (integer? start) (exact? start)))
        (error "irregex-fold: not an exact integer" start))
    (if (not (and (integer? end) (exact? end)))
        (error "irregex-fold: not an exact integer" end))
    (irregex-match-chunker-set! matches irregex-basic-string-chunker)
    (let lp ((src init-src) (from start) (i start) (acc knil))
      (if (>= i end)
          (finish from acc)
          (let ((m (irregex-search/matches
                    irx
                    irregex-basic-string-chunker
                    init
                    src
                    i
                    matches)))
            (if (not m)
                (finish from acc)
                (let ((j-start (%irregex-match-start-index m 0))
                      (j (%irregex-match-end-index m 0))
                      (acc (kons from m acc)))
                  (irregex-reset-matches! matches)
                  (cond
                   ((flag-set? (irregex-flags irx) ~consumer?)
                    (finish j acc))
                   ((= j j-start)
                    ;; skip one char forward if we match the empty string
                    (lp (list str j end) j (+ j 1) acc))
                   (else
                    (lp (list str j end) j j acc))))))))))

(define (irregex-fold irx kons . args)
  (if (not (procedure? kons)) (error "irregex-fold: not a procedure" kons))
  (let ((kons2 (lambda (i m acc) (kons i (irregex-copy-matches m) acc))))
    (apply irregex-fold/fast irx kons2 args)))

(define (irregex-fold/chunked/fast irx kons knil cnk start . o)
................................................................................
                  (if (and (eq? end-src start) (= end-index i))
                      (if (>= end-index ((chunker-get-end cnk) end-src ))
                          (let ((next ((chunker-get-next cnk) end-src)))
                            (lp next ((chunker-get-start cnk) next) acc))
                          (lp end-src (+ end-index 1) acc))
                      (let ((acc (kons start i m acc)))
                        (irregex-reset-matches! matches)



                        (if (flag-set? (irregex-flags irx) ~consumer?)
                            (finish end-src end-index acc)
                            (lp end-src end-index acc)))))))))))

(define (irregex-fold/chunked irx kons . args)
  (if (not (procedure? kons)) (error "irregex-fold/chunked: not a procedure" kons))
  (let ((kons2 (lambda (s i m acc) (kons s i (irregex-copy-matches m) acc))))
    (apply irregex-fold/chunked/fast irx kons2 args)))
................................................................................

(define (irregex-replace/all irx str . o)
  (if (not (string? str)) (error "irregex-replace/all: not a string" str))
  (irregex-fold/fast
   irx
   (lambda (i m acc)
     (let ((m-start (%irregex-match-start-index m 0)))

       (if (>= i m-start)
           (append (irregex-apply-match m o) acc)
           (append (irregex-apply-match m o)
                   (cons (substring str i m-start) acc)))))
   '()
   str
   (lambda (i acc)
     (let ((end (string-length str)))
       (string-cat-reverse (if (>= i end)
                               acc
................................................................................
(define (irregex-split irx str . o)
  (if (not (string? str)) (error "irregex-split: not a string" str))
  (let ((start (if (pair? o) (car o) 0))
        (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str))))
    (irregex-fold/fast
     irx
     (lambda (i m a)
       (cond
        ((= i (%irregex-match-start-index m 0))
         a)
        (else
         (cons (substring str i (%irregex-match-start-index m 0)) a))))
     '()
     str
     (lambda (i a)
       (let lp ((ls (if (= i end) a (cons (substring str i end) a)))
                (res '())
                (was-char? #f))
         (cond
          ((null? ls) res)
          ((char? (car ls))
           (lp (cdr ls)
               (if (or was-char? (null? res))
                   (cons (string (car ls)) res)
                   (cons (string-append (string (car ls)) (car res))
                         (cdr res)))
               #t))
          (else (lp (cdr ls) (cons (car ls) res) #f)))))
     start
     end)))
)