Artifact
198ea9bf44843c480dfc8bae452762385b222176:
- File
irregex-utils.sls
— part of check-in
[c4f5a3bcd9]
at
2016-07-09 21:57:07
on branch trunk
— added gl, irregex, sdl2 - removed match.scm
(user:
ovenpasta@pizzahack.eu
size: 6692)
;;; Chez-Scheme library for Alex Shinn's Irregex
;;;
;;; Copyright (c) 2016 Federico Beffa <beffa@fbengineering.ch>
;;;
;;; Permission to use, copy, modify, and distribute this software for
;;; any purpose with or without fee is hereby granted, provided that the
;;; above copyright notice and this permission notice appear in all
;;; copies.
;;;
;;; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
;;; WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
;;; AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
;;; DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
;;; OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
;;; TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
;;; PERFORMANCE OF THIS SOFTWARE.
(library (irregex-utils)
(export
irregex-quote
irregex-opt
sre->string)
(import
(except (rnrs) error find filter remove)
(only (chezscheme) include get-output-string open-output-string)
(irregex)
(only (srfi :1) every))
;; definition from irregex
(define (error msg . args)
(display msg)
(for-each (lambda (x) (display " ") (write x)) args)
(newline)
(0))
;;;; irregex-utils.scm
;;
;; Copyright (c) 2010 Alex Shinn. All rights reserved.
;; BSD-style license: http://synthcode.com/license.txt
(define rx-special-chars
"\\|[](){}.*+?^$#")
(define (string-scan-char str c . o)
(let ((end (string-length str)))
(let scan ((i (if (pair? o) (car o) 0)))
(cond ((= i end) #f)
((eqv? c (string-ref str i)) i)
(else (scan (+ i 1)))))))
(define (irregex-quote str)
(list->string
(let loop ((ls (string->list str)) (res '()))
(if (null? ls)
(reverse res)
(let ((c (car ls)))
(if (string-scan-char rx-special-chars c)
(loop (cdr ls) (cons c (cons #\\ res)))
(loop (cdr ls) (cons c res))))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (irregex-opt ls)
(define (make-alt ls)
(cond ((null? (cdr ls)) (car ls))
((every char? ls) (list (list->string ls)))
(else (cons 'or ls))))
(define (make-seq ls)
(cond ((null? (cdr ls)) (car ls))
((every (lambda (x) (or (string? x) (char? x))) ls)
(apply string-append (map (lambda (x) (if (char? x) (string x) x)) ls)))
(else (cons 'seq ls))))
(cond
((null? ls) "")
((null? (cdr ls)) (car ls))
(else
(let ((chars (make-vector 256 '())))
(let lp1 ((ls ls) (empty? #f))
(if (null? ls)
(let lp2 ((i 0) (res '()))
(if (= i 256)
(let ((res (make-alt (reverse res))))
(if empty? `(? ,res) res))
(let ((c (integer->char i))
(opts (vector-ref chars i)))
(lp2 (+ i 1)
(cond
((null? opts) res)
((equal? opts '("")) `(,c ,@res))
(else `(,(make-seq (list c (irregex-opt opts)))
,@res)))))))
(let* ((str (car ls))
(len (string-length str)))
(if (zero? len)
(lp1 (cdr ls) #t)
(let ((i (char->integer (string-ref str 0))))
(vector-set!
chars
i
(cons (substring str 1 len) (vector-ref chars i)))
(lp1 (cdr ls) empty?))))))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (cset->string ls)
(let ((out (open-output-string)))
(let lp ((ls ls))
(cond
((pair? ls)
(cond
((pair? (car ls))
(display (irregex-quote (string (caar ls))) out)
(write-char #\- out)
(display (irregex-quote (string (cdar ls))) out))
(else (display (irregex-quote (string (car ls))) out)))
(lp (cdr ls)))))
(get-output-string out)))
(define (sre->string obj)
(let ((out (open-output-string)))
(let lp ((x obj))
(cond
((pair? x)
(case (car x)
((: seq)
(cond
((and (pair? (cddr x)) (pair? (cddr x)) (not (eq? x obj)))
(display "(?:" out) (for-each lp (cdr x)) (display ")" out))
(else (for-each lp (cdr x)))))
((submatch)
(display "(" out) (for-each lp (cdr x)) (display ")" out))
((submatch-named)
(display "(?<" out) (display (cadr x) out) (display ">" out)
(for-each lp (cddr x)) (display ")" out))
((or)
(display "(?:" out)
(lp (cadr x))
(for-each (lambda (x) (display "|" out) (lp x)) (cddr x))
(display ")" out))
((* + ? *? ??)
(cond
((pair? (cddr x))
(display "(?:" out) (for-each lp (cdr x)) (display ")" out))
(else (lp (cadr x))))
(display (car x) out))
((not)
(cond
((and (pair? (cadr x)) (eq? 'cset (caadr x)))
(display "[^" out)
(display (cset->string (cdadr x)) out)
(display "]" out))
(else (error "can't represent general 'not' in strings" x))))
((cset)
(display "[" out)
(display (cset->string (cdr x)) out)
(display "]" out))
((- & / ~)
(cond
((or (eq? #\~ (car x))
(and (eq? '- (car x)) (pair? (cdr x)) (eq? 'any (cadr x))))
(display "[^" out)
(display (cset->string (if (eq? #\~ (car x)) (cdr x) (cddr x))) out)
(display "]" out))
(else
(lp `(cset ,@(sre->cset x))))))
((w/case w/nocase)
(display "(?" out)
(if (eq? (car x) 'w/case) (display "-" out))
(display ":" out)
(for-each lp (cdr x))
(display ")" out))
(else
(if (string? (car x))
(lp `(cset ,@(string->list (car x))))
(error "unknown sre operator" x)))))
((symbol? x)
(case x
((bos bol) (display "^" out))
((eos eol) (display "$" out))
((any nonl) (display "." out))
(else (error "unknown sre symbol" x))))
((string? x)
(display (irregex-quote x) out))
((char? x)
(display (irregex-quote (string x)) out))
(else
(error "unknown sre pattern" x))))
(get-output-string out)))
)