| 1 |
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*- |
|---|
| 2 |
;;; $Header: /usr/local/cvsrep/cl-ppcre/util.lisp,v 1.46 2008/07/06 18:12:05 edi Exp $ |
|---|
| 3 |
|
|---|
| 4 |
;;; Utility functions and constants dealing with the character sets we |
|---|
| 5 |
;;; use to encode character classes |
|---|
| 6 |
|
|---|
| 7 |
;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved. |
|---|
| 8 |
|
|---|
| 9 |
;;; Redistribution and use in source and binary forms, with or without |
|---|
| 10 |
;;; modification, are permitted provided that the following conditions |
|---|
| 11 |
;;; are met: |
|---|
| 12 |
|
|---|
| 13 |
;;; * Redistributions of source code must retain the above copyright |
|---|
| 14 |
;;; notice, this list of conditions and the following disclaimer. |
|---|
| 15 |
|
|---|
| 16 |
;;; * Redistributions in binary form must reproduce the above |
|---|
| 17 |
;;; copyright notice, this list of conditions and the following |
|---|
| 18 |
;;; disclaimer in the documentation and/or other materials |
|---|
| 19 |
;;; provided with the distribution. |
|---|
| 20 |
|
|---|
| 21 |
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED |
|---|
| 22 |
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|---|
| 23 |
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|---|
| 24 |
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
|---|
| 25 |
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|---|
| 26 |
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE |
|---|
| 27 |
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|---|
| 28 |
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|---|
| 29 |
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|---|
| 30 |
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|---|
| 31 |
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 32 |
|
|---|
| 33 |
(in-package :cl-ppcre) |
|---|
| 34 |
|
|---|
| 35 |
(defmacro defconstant (name value &optional doc) |
|---|
| 36 |
"Make sure VALUE is evaluated only once \(to appease SBCL)." |
|---|
| 37 |
`(cl:defconstant ,name (if (boundp ',name) (symbol-value ',name) ,value) |
|---|
| 38 |
,@(when doc (list doc)))) |
|---|
| 39 |
|
|---|
| 40 |
#+:lispworks |
|---|
| 41 |
(eval-when (:compile-toplevel :load-toplevel :execute) |
|---|
| 42 |
(import 'lw:with-unique-names)) |
|---|
| 43 |
|
|---|
| 44 |
#-:lispworks |
|---|
| 45 |
(defmacro with-unique-names ((&rest bindings) &body body) |
|---|
| 46 |
"Syntax: WITH-UNIQUE-NAMES ( { var | (var x) }* ) declaration* form* |
|---|
| 47 |
|
|---|
| 48 |
Executes a series of forms with each VAR bound to a fresh, |
|---|
| 49 |
uninterned symbol. The uninterned symbol is as if returned by a call |
|---|
| 50 |
to GENSYM with the string denoted by X - or, if X is not supplied, the |
|---|
| 51 |
string denoted by VAR - as argument. |
|---|
| 52 |
|
|---|
| 53 |
The variable bindings created are lexical unless special declarations |
|---|
| 54 |
are specified. The scopes of the name bindings and declarations do not |
|---|
| 55 |
include the Xs. |
|---|
| 56 |
|
|---|
| 57 |
The forms are evaluated in order, and the values of all but the last |
|---|
| 58 |
are discarded \(that is, the body is an implicit PROGN)." |
|---|
| 59 |
;; reference implementation posted to comp.lang.lisp as |
|---|
| 60 |
;; <cy3bshuf30f.fsf@ljosa.com> by Vebjorn Ljosa - see also |
|---|
| 61 |
;; <http://www.cliki.net/Common%20Lisp%20Utilities> |
|---|
| 62 |
`(let ,(mapcar #'(lambda (binding) |
|---|
| 63 |
(check-type binding (or cons symbol)) |
|---|
| 64 |
(if (consp binding) |
|---|
| 65 |
(destructuring-bind (var x) binding |
|---|
| 66 |
(check-type var symbol) |
|---|
| 67 |
`(,var (gensym ,(etypecase x |
|---|
| 68 |
(symbol (symbol-name x)) |
|---|
| 69 |
(character (string x)) |
|---|
| 70 |
(string x))))) |
|---|
| 71 |
`(,binding (gensym ,(symbol-name binding))))) |
|---|
| 72 |
bindings) |
|---|
| 73 |
,@body)) |
|---|
| 74 |
|
|---|
| 75 |
#+:lispworks |
|---|
| 76 |
(eval-when (:compile-toplevel :load-toplevel :execute) |
|---|
| 77 |
(setf (macro-function 'with-rebinding) |
|---|
| 78 |
(macro-function 'lw:rebinding))) |
|---|
| 79 |
|
|---|
| 80 |
#-:lispworks |
|---|
| 81 |
(defmacro with-rebinding (bindings &body body) |
|---|
| 82 |
"WITH-REBINDING ( { var | (var prefix) }* ) form* |
|---|
| 83 |
|
|---|
| 84 |
Evaluates a series of forms in the lexical environment that is |
|---|
| 85 |
formed by adding the binding of each VAR to a fresh, uninterned |
|---|
| 86 |
symbol, and the binding of that fresh, uninterned symbol to VAR's |
|---|
| 87 |
original value, i.e., its value in the current lexical environment. |
|---|
| 88 |
|
|---|
| 89 |
The uninterned symbol is created as if by a call to GENSYM with the |
|---|
| 90 |
string denoted by PREFIX - or, if PREFIX is not supplied, the string |
|---|
| 91 |
denoted by VAR - as argument. |
|---|
| 92 |
|
|---|
| 93 |
The forms are evaluated in order, and the values of all but the last |
|---|
| 94 |
are discarded \(that is, the body is an implicit PROGN)." |
|---|
| 95 |
;; reference implementation posted to comp.lang.lisp as |
|---|
| 96 |
;; <cy3wv0fya0p.fsf@ljosa.com> by Vebjorn Ljosa - see also |
|---|
| 97 |
;; <http://www.cliki.net/Common%20Lisp%20Utilities> |
|---|
| 98 |
(loop for binding in bindings |
|---|
| 99 |
for var = (if (consp binding) (car binding) binding) |
|---|
| 100 |
for name = (gensym) |
|---|
| 101 |
collect `(,name ,var) into renames |
|---|
| 102 |
collect ``(,,var ,,name) into temps |
|---|
| 103 |
finally (return `(let ,renames |
|---|
| 104 |
(with-unique-names ,bindings |
|---|
| 105 |
`(let (,,@temps) |
|---|
| 106 |
,,@body)))))) |
|---|
| 107 |
|
|---|
| 108 |
(declaim (inline digit-char-p)) |
|---|
| 109 |
(defun digit-char-p (chr) |
|---|
| 110 |
(declare #.*standard-optimize-settings*) |
|---|
| 111 |
"Tests whether a character is a decimal digit, i.e. the same as |
|---|
| 112 |
Perl's [\\d]. Note that this function shadows the standard Common |
|---|
| 113 |
Lisp function CL:DIGIT-CHAR-P." |
|---|
| 114 |
(char<= #\0 chr #\9)) |
|---|
| 115 |
|
|---|
| 116 |
(declaim (inline word-char-p)) |
|---|
| 117 |
(defun word-char-p (chr) |
|---|
| 118 |
(declare #.*standard-optimize-settings*) |
|---|
| 119 |
"Tests whether a character is a \"word\" character. In the ASCII |
|---|
| 120 |
charset this is equivalent to a-z, A-Z, 0-9, or _, i.e. the same as |
|---|
| 121 |
Perl's [\\w]." |
|---|
| 122 |
(or (alphanumericp chr) |
|---|
| 123 |
(char= chr #\_))) |
|---|
| 124 |
|
|---|
| 125 |
(defconstant +whitespace-char-string+ |
|---|
| 126 |
(coerce '(#\Space #\Tab #\Linefeed #\Return #\Page) 'string) |
|---|
| 127 |
"A string of all characters which are considered to be whitespace. |
|---|
| 128 |
Same as Perl's [\\s].") |
|---|
| 129 |
|
|---|
| 130 |
(defun whitespacep (chr) |
|---|
| 131 |
(declare #.*special-optimize-settings*) |
|---|
| 132 |
"Tests whether a character is whitespace, i.e. whether it would |
|---|
| 133 |
match [\\s] in Perl." |
|---|
| 134 |
(find chr +whitespace-char-string+ :test #'char=)) |
|---|
| 135 |
|
|---|
| 136 |
(defmacro maybe-coerce-to-simple-string (string) |
|---|
| 137 |
"Coerces STRING to a simple STRING unless it already is one." |
|---|
| 138 |
(with-unique-names (=string=) |
|---|
| 139 |
`(let ((,=string= ,string)) |
|---|
| 140 |
(cond ((simple-string-p ,=string=) |
|---|
| 141 |
,=string=) |
|---|
| 142 |
(t |
|---|
| 143 |
(coerce ,=string= 'simple-string)))))) |
|---|
| 144 |
|
|---|
| 145 |
(declaim (inline nsubseq)) |
|---|
| 146 |
(defun nsubseq (sequence start &optional (end (length sequence))) |
|---|
| 147 |
"Returns a subsequence by pointing to location in original sequence." |
|---|
| 148 |
(make-array (- end start) |
|---|
| 149 |
:element-type (array-element-type sequence) |
|---|
| 150 |
:displaced-to sequence |
|---|
| 151 |
:displaced-index-offset start)) |
|---|
| 152 |
|
|---|
| 153 |
(defun normalize-var-list (var-list) |
|---|
| 154 |
"Utility function for REGISTER-GROUPS-BIND and DO-REGISTER-GROUPS. |
|---|
| 155 |
Creates the long form \(a list of \(FUNCTION VAR) entries) out of the |
|---|
| 156 |
short form of VAR-LIST." |
|---|
| 157 |
(loop for element in var-list |
|---|
| 158 |
if (consp element) |
|---|
| 159 |
nconc (loop for var in (rest element) |
|---|
| 160 |
collect (list (first element) var)) |
|---|
| 161 |
else |
|---|
| 162 |
collect (list '(function identity) element))) |
|---|
| 163 |
|
|---|
| 164 |
(defun string-list-to-simple-string (string-list) |
|---|
| 165 |
"Concatenates a list of strings to one simple-string." |
|---|
| 166 |
(declare #.*standard-optimize-settings*) |
|---|
| 167 |
;; this function provided by JP Massar; note that we can't use APPLY |
|---|
| 168 |
;; with CONCATENATE here because of CALL-ARGUMENTS-LIMIT |
|---|
| 169 |
(let ((total-size 0)) |
|---|
| 170 |
(declare (fixnum total-size)) |
|---|
| 171 |
(dolist (string string-list) |
|---|
| 172 |
#-:genera (declare (string string)) |
|---|
| 173 |
(incf total-size (length string))) |
|---|
| 174 |
(let ((result-string (make-sequence 'simple-string total-size)) |
|---|
| 175 |
(curr-pos 0)) |
|---|
| 176 |
(declare (fixnum curr-pos)) |
|---|
| 177 |
(dolist (string string-list) |
|---|
| 178 |
#-:genera (declare (string string)) |
|---|
| 179 |
(replace result-string string :start1 curr-pos) |
|---|
| 180 |
(incf curr-pos (length string))) |
|---|
| 181 |
result-string))) |
|---|
| 182 |
|
|---|
| 183 |
(defun complement* (test-function) |
|---|
| 184 |
"Like COMPLEMENT but optimized for unary functions." |
|---|
| 185 |
(declare #.*standard-optimize-settings*) |
|---|
| 186 |
(typecase test-function |
|---|
| 187 |
(function |
|---|
| 188 |
(lambda (char) |
|---|
| 189 |
(declare (character char)) |
|---|
| 190 |
(not (funcall (the function test-function) char)))) |
|---|
| 191 |
(otherwise |
|---|
| 192 |
(lambda (char) |
|---|
| 193 |
(declare (character char)) |
|---|
| 194 |
(not (funcall test-function char)))))) |
|---|