| 1 |
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*- |
|---|
| 2 |
;;; $Header: /usr/local/cvsrep/cl-ppcre/convert.lisp,v 1.54 2008/07/23 02:14:06 edi Exp $ |
|---|
| 3 |
|
|---|
| 4 |
;;; Here the parse tree is converted into its internal representation |
|---|
| 5 |
;;; using REGEX objects. At the same time some optimizations are |
|---|
| 6 |
;;; already applied. |
|---|
| 7 |
|
|---|
| 8 |
;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved. |
|---|
| 9 |
|
|---|
| 10 |
;;; Redistribution and use in source and binary forms, with or without |
|---|
| 11 |
;;; modification, are permitted provided that the following conditions |
|---|
| 12 |
;;; are met: |
|---|
| 13 |
|
|---|
| 14 |
;;; * Redistributions of source code must retain the above copyright |
|---|
| 15 |
;;; notice, this list of conditions and the following disclaimer. |
|---|
| 16 |
|
|---|
| 17 |
;;; * Redistributions in binary form must reproduce the above |
|---|
| 18 |
;;; copyright notice, this list of conditions and the following |
|---|
| 19 |
;;; disclaimer in the documentation and/or other materials |
|---|
| 20 |
;;; provided with the distribution. |
|---|
| 21 |
|
|---|
| 22 |
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED |
|---|
| 23 |
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|---|
| 24 |
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|---|
| 25 |
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
|---|
| 26 |
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|---|
| 27 |
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE |
|---|
| 28 |
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|---|
| 29 |
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|---|
| 30 |
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|---|
| 31 |
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|---|
| 32 |
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 33 |
|
|---|
| 34 |
(in-package :cl-ppcre) |
|---|
| 35 |
|
|---|
| 36 |
;;; The flags that represent the "ism" modifiers are always kept |
|---|
| 37 |
;;; together in a three-element list. We use the following macros to |
|---|
| 38 |
;;; access individual elements. |
|---|
| 39 |
|
|---|
| 40 |
(defmacro case-insensitive-mode-p (flags) |
|---|
| 41 |
"Accessor macro to extract the first flag out of a three-element flag list." |
|---|
| 42 |
`(first ,flags)) |
|---|
| 43 |
|
|---|
| 44 |
(defmacro multi-line-mode-p (flags) |
|---|
| 45 |
"Accessor macro to extract the second flag out of a three-element flag list." |
|---|
| 46 |
`(second ,flags)) |
|---|
| 47 |
|
|---|
| 48 |
(defmacro single-line-mode-p (flags) |
|---|
| 49 |
"Accessor macro to extract the third flag out of a three-element flag list." |
|---|
| 50 |
`(third ,flags)) |
|---|
| 51 |
|
|---|
| 52 |
(defun set-flag (token) |
|---|
| 53 |
"Reads a flag token and sets or unsets the corresponding entry in |
|---|
| 54 |
the special FLAGS list." |
|---|
| 55 |
(declare #.*standard-optimize-settings*) |
|---|
| 56 |
(declare (special flags)) |
|---|
| 57 |
(case token |
|---|
| 58 |
((:case-insensitive-p) |
|---|
| 59 |
(setf (case-insensitive-mode-p flags) t)) |
|---|
| 60 |
((:case-sensitive-p) |
|---|
| 61 |
(setf (case-insensitive-mode-p flags) nil)) |
|---|
| 62 |
((:multi-line-mode-p) |
|---|
| 63 |
(setf (multi-line-mode-p flags) t)) |
|---|
| 64 |
((:not-multi-line-mode-p) |
|---|
| 65 |
(setf (multi-line-mode-p flags) nil)) |
|---|
| 66 |
((:single-line-mode-p) |
|---|
| 67 |
(setf (single-line-mode-p flags) t)) |
|---|
| 68 |
((:not-single-line-mode-p) |
|---|
| 69 |
(setf (single-line-mode-p flags) nil)) |
|---|
| 70 |
(otherwise |
|---|
| 71 |
(signal-syntax-error "Unknown flag token ~A." token)))) |
|---|
| 72 |
|
|---|
| 73 |
(defgeneric resolve-property (property) |
|---|
| 74 |
(:documentation "Resolves PROPERTY to a unary character test |
|---|
| 75 |
function. PROPERTY can either be a function designator or it can be a |
|---|
| 76 |
string which is resolved using *PROPERTY-RESOLVER*.") |
|---|
| 77 |
(:method ((property-name string)) |
|---|
| 78 |
(funcall *property-resolver* property-name)) |
|---|
| 79 |
(:method ((function-name symbol)) |
|---|
| 80 |
function-name) |
|---|
| 81 |
(:method ((test-function function)) |
|---|
| 82 |
test-function)) |
|---|
| 83 |
|
|---|
| 84 |
(defun convert-char-class-to-test-function (list invertedp case-insensitive-p) |
|---|
| 85 |
"Combines all items in LIST into test function and returns a |
|---|
| 86 |
logical-OR combination of these functions. Items can be single |
|---|
| 87 |
characters, character ranges like \(:RANGE #\\A #\\E), or special |
|---|
| 88 |
character classes like :DIGIT-CLASS. Does the right thing with |
|---|
| 89 |
respect to case-\(in)sensitivity as specified by the special variable |
|---|
| 90 |
FLAGS." |
|---|
| 91 |
(declare #.*standard-optimize-settings*) |
|---|
| 92 |
(declare (special flags)) |
|---|
| 93 |
(let ((test-functions |
|---|
| 94 |
(loop for item in list |
|---|
| 95 |
collect (cond ((characterp item) |
|---|
| 96 |
;; rebind so closure captures the right one |
|---|
| 97 |
(let ((this-char item)) |
|---|
| 98 |
(lambda (char) |
|---|
| 99 |
(declare (character char this-char)) |
|---|
| 100 |
(char= char this-char)))) |
|---|
| 101 |
((symbolp item) |
|---|
| 102 |
(case item |
|---|
| 103 |
((:digit-class) #'digit-char-p) |
|---|
| 104 |
((:non-digit-class) (complement* #'digit-char-p)) |
|---|
| 105 |
((:whitespace-char-class) #'whitespacep) |
|---|
| 106 |
((:non-whitespace-char-class) (complement* #'whitespacep)) |
|---|
| 107 |
((:word-char-class) #'word-char-p) |
|---|
| 108 |
((:non-word-char-class) (complement* #'word-char-p)) |
|---|
| 109 |
(otherwise |
|---|
| 110 |
(signal-syntax-error "Unknown symbol ~A in character class." item)))) |
|---|
| 111 |
((and (consp item) |
|---|
| 112 |
(eq (first item) :property)) |
|---|
| 113 |
(resolve-property (second item))) |
|---|
| 114 |
((and (consp item) |
|---|
| 115 |
(eq (first item) :inverted-property)) |
|---|
| 116 |
(complement* (resolve-property (second item)))) |
|---|
| 117 |
((and (consp item) |
|---|
| 118 |
(eq (first item) :range)) |
|---|
| 119 |
(let ((from (second item)) |
|---|
| 120 |
(to (third item))) |
|---|
| 121 |
(when (char> from to) |
|---|
| 122 |
(signal-syntax-error "Invalid range from ~S to ~S in char-class." from to)) |
|---|
| 123 |
(lambda (char) |
|---|
| 124 |
(declare (character char from to)) |
|---|
| 125 |
(char<= from char to)))) |
|---|
| 126 |
(t (signal-syntax-error "Unknown item ~A in char-class list." item)))))) |
|---|
| 127 |
(unless test-functions |
|---|
| 128 |
(signal-syntax-error "Empty character class.")) |
|---|
| 129 |
(cond ((cdr test-functions) |
|---|
| 130 |
(cond ((and invertedp case-insensitive-p) |
|---|
| 131 |
(lambda (char) |
|---|
| 132 |
(declare (character char)) |
|---|
| 133 |
(loop with both-case-p = (both-case-p char) |
|---|
| 134 |
with char-down = (if both-case-p (char-downcase char) char) |
|---|
| 135 |
with char-up = (if both-case-p (char-upcase char) nil) |
|---|
| 136 |
for test-function in test-functions |
|---|
| 137 |
never (or (funcall test-function char-down) |
|---|
| 138 |
(and char-up (funcall test-function char-up)))))) |
|---|
| 139 |
(case-insensitive-p |
|---|
| 140 |
(lambda (char) |
|---|
| 141 |
(declare (character char)) |
|---|
| 142 |
(loop with both-case-p = (both-case-p char) |
|---|
| 143 |
with char-down = (if both-case-p (char-downcase char) char) |
|---|
| 144 |
with char-up = (if both-case-p (char-upcase char) nil) |
|---|
| 145 |
for test-function in test-functions |
|---|
| 146 |
thereis (or (funcall test-function char-down) |
|---|
| 147 |
(and char-up (funcall test-function char-up)))))) |
|---|
| 148 |
(invertedp |
|---|
| 149 |
(lambda (char) |
|---|
| 150 |
(loop for test-function in test-functions |
|---|
| 151 |
never (funcall test-function char)))) |
|---|
| 152 |
(t |
|---|
| 153 |
(lambda (char) |
|---|
| 154 |
(loop for test-function in test-functions |
|---|
| 155 |
thereis (funcall test-function char)))))) |
|---|
| 156 |
;; there's only one test-function |
|---|
| 157 |
(t (let ((test-function (first test-functions))) |
|---|
| 158 |
(cond ((and invertedp case-insensitive-p) |
|---|
| 159 |
(lambda (char) |
|---|
| 160 |
(declare (character char)) |
|---|
| 161 |
(not (or (funcall test-function (char-downcase char)) |
|---|
| 162 |
(and (both-case-p char) |
|---|
| 163 |
(funcall test-function (char-upcase char))))))) |
|---|
| 164 |
(case-insensitive-p |
|---|
| 165 |
(lambda (char) |
|---|
| 166 |
(declare (character char)) |
|---|
| 167 |
(or (funcall test-function (char-downcase char)) |
|---|
| 168 |
(and (both-case-p char) |
|---|
| 169 |
(funcall test-function (char-upcase char)))))) |
|---|
| 170 |
(invertedp (complement* test-function)) |
|---|
| 171 |
(t test-function))))))) |
|---|
| 172 |
|
|---|
| 173 |
(defun maybe-split-repetition (regex |
|---|
| 174 |
greedyp |
|---|
| 175 |
minimum |
|---|
| 176 |
maximum |
|---|
| 177 |
min-len |
|---|
| 178 |
length |
|---|
| 179 |
reg-seen) |
|---|
| 180 |
"Splits a REPETITION object into a constant and a varying part if |
|---|
| 181 |
applicable, i.e. something like |
|---|
| 182 |
a{3,} -> a{3}a* |
|---|
| 183 |
The arguments to this function correspond to the REPETITION slots of |
|---|
| 184 |
the same name." |
|---|
| 185 |
(declare #.*standard-optimize-settings*) |
|---|
| 186 |
(declare (fixnum minimum) |
|---|
| 187 |
((or fixnum null) maximum)) |
|---|
| 188 |
;; note the usage of COPY-REGEX here; we can't use the same REGEX |
|---|
| 189 |
;; object in both REPETITIONS because they will have different |
|---|
| 190 |
;; offsets |
|---|
| 191 |
(when maximum |
|---|
| 192 |
(when (zerop maximum) |
|---|
| 193 |
;; trivial case: don't repeat at all |
|---|
| 194 |
(return-from maybe-split-repetition |
|---|
| 195 |
(make-instance 'void))) |
|---|
| 196 |
(when (= 1 minimum maximum) |
|---|
| 197 |
;; another trivial case: "repeat" exactly once |
|---|
| 198 |
(return-from maybe-split-repetition |
|---|
| 199 |
regex))) |
|---|
| 200 |
;; first set up the constant part of the repetition |
|---|
| 201 |
;; maybe that's all we need |
|---|
| 202 |
(let ((constant-repetition (if (plusp minimum) |
|---|
| 203 |
(make-instance 'repetition |
|---|
| 204 |
:regex (copy-regex regex) |
|---|
| 205 |
:greedyp greedyp |
|---|
| 206 |
:minimum minimum |
|---|
| 207 |
:maximum minimum |
|---|
| 208 |
:min-len min-len |
|---|
| 209 |
:len length |
|---|
| 210 |
:contains-register-p reg-seen) |
|---|
| 211 |
;; don't create garbage if minimum is 0 |
|---|
| 212 |
nil))) |
|---|
| 213 |
(when (and maximum |
|---|
| 214 |
(= maximum minimum)) |
|---|
| 215 |
(return-from maybe-split-repetition |
|---|
| 216 |
;; no varying part needed because min = max |
|---|
| 217 |
constant-repetition)) |
|---|
| 218 |
;; now construct the varying part |
|---|
| 219 |
(let ((varying-repetition |
|---|
| 220 |
(make-instance 'repetition |
|---|
| 221 |
:regex regex |
|---|
| 222 |
:greedyp greedyp |
|---|
| 223 |
:minimum 0 |
|---|
| 224 |
:maximum (if maximum (- maximum minimum) nil) |
|---|
| 225 |
:min-len min-len |
|---|
| 226 |
:len length |
|---|
| 227 |
:contains-register-p reg-seen))) |
|---|
| 228 |
(cond ((zerop minimum) |
|---|
| 229 |
;; min = 0, no constant part needed |
|---|
| 230 |
varying-repetition) |
|---|
| 231 |
((= 1 minimum) |
|---|
| 232 |
;; min = 1, constant part needs no REPETITION wrapped around |
|---|
| 233 |
(make-instance 'seq |
|---|
| 234 |
:elements (list (copy-regex regex) |
|---|
| 235 |
varying-repetition))) |
|---|
| 236 |
(t |
|---|
| 237 |
;; general case |
|---|
| 238 |
(make-instance 'seq |
|---|
| 239 |
:elements (list constant-repetition |
|---|
| 240 |
varying-repetition))))))) |
|---|
| 241 |
|
|---|
| 242 |
;; During the conversion of the parse tree we keep track of the start |
|---|
| 243 |
;; of the parse tree in the special variable STARTS-WITH which'll |
|---|
| 244 |
;; either hold a STR object or an EVERYTHING object. The latter is the |
|---|
| 245 |
;; case if the regex starts with ".*" which implicitly anchors the |
|---|
| 246 |
;; regex at the start (perhaps modulo #\Newline). |
|---|
| 247 |
|
|---|
| 248 |
(defun maybe-accumulate (str) |
|---|
| 249 |
"Accumulate STR into the special variable STARTS-WITH if |
|---|
| 250 |
ACCUMULATE-START-P (also special) is true and STARTS-WITH is either |
|---|
| 251 |
NIL or a STR object of the same case mode. Always returns NIL." |
|---|
| 252 |
(declare #.*standard-optimize-settings*) |
|---|
| 253 |
(declare (special accumulate-start-p starts-with)) |
|---|
| 254 |
(declare (ftype (function (t) fixnum) len)) |
|---|
| 255 |
(when accumulate-start-p |
|---|
| 256 |
(etypecase starts-with |
|---|
| 257 |
(str |
|---|
| 258 |
;; STARTS-WITH already holds a STR, so we check if we can |
|---|
| 259 |
;; concatenate |
|---|
| 260 |
(cond ((eq (case-insensitive-p starts-with) |
|---|
| 261 |
(case-insensitive-p str)) |
|---|
| 262 |
;; we modify STARTS-WITH in place |
|---|
| 263 |
(setf (len starts-with) |
|---|
| 264 |
(+ (len starts-with) (len str))) |
|---|
| 265 |
;; note that we use SLOT-VALUE because the accessor |
|---|
| 266 |
;; STR has a declared FTYPE which doesn't fit here |
|---|
| 267 |
(adjust-array (slot-value starts-with 'str) |
|---|
| 268 |
(len starts-with) |
|---|
| 269 |
:fill-pointer t) |
|---|
| 270 |
(setf (subseq (slot-value starts-with 'str) |
|---|
| 271 |
(- (len starts-with) (len str))) |
|---|
| 272 |
(str str) |
|---|
| 273 |
;; STR objects that are parts of STARTS-WITH |
|---|
| 274 |
;; always have their SKIP slot set to true |
|---|
| 275 |
;; because the SCAN function will take care of |
|---|
| 276 |
;; them, i.e. the matcher can ignore them |
|---|
| 277 |
(skip str) t)) |
|---|
| 278 |
(t (setq accumulate-start-p nil)))) |
|---|
| 279 |
(null |
|---|
| 280 |
;; STARTS-WITH is still empty, so we create a new STR object |
|---|
| 281 |
(setf starts-with |
|---|
| 282 |
(make-instance 'str |
|---|
| 283 |
:str "" |
|---|
| 284 |
:case-insensitive-p (case-insensitive-p str)) |
|---|
| 285 |
;; INITIALIZE-INSTANCE will coerce the STR to a simple |
|---|
| 286 |
;; string, so we have to fill it afterwards |
|---|
| 287 |
(slot-value starts-with 'str) |
|---|
| 288 |
(make-array (len str) |
|---|
| 289 |
:initial-contents (str str) |
|---|
| 290 |
:element-type 'character |
|---|
| 291 |
:fill-pointer t |
|---|
| 292 |
:adjustable t) |
|---|
| 293 |
(len starts-with) |
|---|
| 294 |
(len str) |
|---|
| 295 |
;; see remark about SKIP above |
|---|
| 296 |
(skip str) t)) |
|---|
| 297 |
(everything |
|---|
| 298 |
;; STARTS-WITH already holds an EVERYTHING object - we can't |
|---|
| 299 |
;; concatenate |
|---|
| 300 |
(setq accumulate-start-p nil)))) |
|---|
| 301 |
nil) |
|---|
| 302 |
|
|---|
| 303 |
(declaim (inline convert-aux)) |
|---|
| 304 |
(defun convert-aux (parse-tree) |
|---|
| 305 |
"Converts the parse tree PARSE-TREE into a REGEX object and returns |
|---|
| 306 |
it. Will also |
|---|
| 307 |
|
|---|
| 308 |
- split and optimize repetitions, |
|---|
| 309 |
- accumulate strings or EVERYTHING objects into the special variable |
|---|
| 310 |
STARTS-WITH, |
|---|
| 311 |
- keep track of all registers seen in the special variable REG-NUM, |
|---|
| 312 |
- keep track of all named registers seen in the special variable REG-NAMES |
|---|
| 313 |
- keep track of the highest backreference seen in the special |
|---|
| 314 |
variable MAX-BACK-REF, |
|---|
| 315 |
- maintain and adher to the currently applicable modifiers in the special |
|---|
| 316 |
variable FLAGS, and |
|---|
| 317 |
- maybe even wash your car..." |
|---|
| 318 |
(declare #.*standard-optimize-settings*) |
|---|
| 319 |
(if (consp parse-tree) |
|---|
| 320 |
(convert-compound-parse-tree (first parse-tree) parse-tree) |
|---|
| 321 |
(convert-simple-parse-tree parse-tree))) |
|---|
| 322 |
|
|---|
| 323 |
(defgeneric convert-compound-parse-tree (token parse-tree &key) |
|---|
| 324 |
(declare #.*standard-optimize-settings*) |
|---|
| 325 |
(:documentation "Helper function for CONVERT-AUX which converts |
|---|
| 326 |
parse trees which are conses and dispatches on TOKEN which is the |
|---|
| 327 |
first element of the parse tree.") |
|---|
| 328 |
(:method (token parse-tree &key) |
|---|
| 329 |
(signal-syntax-error "Unknown token ~A in parse-tree." token))) |
|---|
| 330 |
|
|---|
| 331 |
(defmethod convert-compound-parse-tree ((token (eql :sequence)) parse-tree &key) |
|---|
| 332 |
"The case for parse trees like \(:SEQUENCE {<regex>}*)." |
|---|
| 333 |
(declare #.*standard-optimize-settings*) |
|---|
| 334 |
(cond ((cddr parse-tree) |
|---|
| 335 |
;; this is essentially like |
|---|
| 336 |
;; (MAPCAR 'CONVERT-AUX (REST PARSE-TREE)) |
|---|
| 337 |
;; but we don't cons a new list |
|---|
| 338 |
(loop for parse-tree-rest on (rest parse-tree) |
|---|
| 339 |
while parse-tree-rest |
|---|
| 340 |
do (setf (car parse-tree-rest) |
|---|
| 341 |
(convert-aux (car parse-tree-rest)))) |
|---|
| 342 |
(make-instance 'seq :elements (rest parse-tree))) |
|---|
| 343 |
(t (convert-aux (second parse-tree))))) |
|---|
| 344 |
|
|---|
| 345 |
(defmethod convert-compound-parse-tree ((token (eql :group)) parse-tree &key) |
|---|
| 346 |
"The case for parse trees like \(:GROUP {<regex>}*). |
|---|
| 347 |
|
|---|
| 348 |
This is a syntactical construct equivalent to :SEQUENCE intended to |
|---|
| 349 |
keep the effect of modifiers local." |
|---|
| 350 |
(declare #.*standard-optimize-settings*) |
|---|
| 351 |
(declare (special flags)) |
|---|
| 352 |
;; make a local copy of FLAGS and shadow the global value while we |
|---|
| 353 |
;; descend into the enclosed regexes |
|---|
| 354 |
(let ((flags (copy-list flags))) |
|---|
| 355 |
(declare (special flags)) |
|---|
| 356 |
(cond ((cddr parse-tree) |
|---|
| 357 |
(loop for parse-tree-rest on (rest parse-tree) |
|---|
| 358 |
while parse-tree-rest |
|---|
| 359 |
do (setf (car parse-tree-rest) |
|---|
| 360 |
(convert-aux (car parse-tree-rest)))) |
|---|
| 361 |
(make-instance 'seq :elements (rest parse-tree))) |
|---|
| 362 |
(t (convert-aux (second parse-tree)))))) |
|---|
| 363 |
|
|---|
| 364 |
(defmethod convert-compound-parse-tree ((token (eql :alternation)) parse-tree &key) |
|---|
| 365 |
"The case for \(:ALTERNATION {<regex>}*)." |
|---|
| 366 |
(declare #.*standard-optimize-settings*) |
|---|
| 367 |
(declare (special accumulate-start-p)) |
|---|
| 368 |
;; we must stop accumulating objects into STARTS-WITH once we reach |
|---|
| 369 |
;; an alternation |
|---|
| 370 |
(setq accumulate-start-p nil) |
|---|
| 371 |
(loop for parse-tree-rest on (rest parse-tree) |
|---|
| 372 |
while parse-tree-rest |
|---|
| 373 |
do (setf (car parse-tree-rest) |
|---|
| 374 |
(convert-aux (car parse-tree-rest)))) |
|---|
| 375 |
(make-instance 'alternation :choices (rest parse-tree))) |
|---|
| 376 |
|
|---|
| 377 |
(defmethod convert-compound-parse-tree ((token (eql :branch)) parse-tree &key) |
|---|
| 378 |
"The case for \(:BRANCH <test> <regex>). |
|---|
| 379 |
|
|---|
| 380 |
Here, <test> must be look-ahead, look-behind or number; if <regex> is |
|---|
| 381 |
an alternation it must have one or two choices." |
|---|
| 382 |
(declare #.*standard-optimize-settings*) |
|---|
| 383 |
(declare (special accumulate-start-p)) |
|---|
| 384 |
(setq accumulate-start-p nil) |
|---|
| 385 |
(let* ((test-candidate (second parse-tree)) |
|---|
| 386 |
(test (cond ((numberp test-candidate) |
|---|
| 387 |
(when (zerop (the fixnum test-candidate)) |
|---|
| 388 |
(signal-syntax-error "Register 0 doesn't exist: ~S." parse-tree)) |
|---|
| 389 |
(1- (the fixnum test-candidate))) |
|---|
| 390 |
(t (convert-aux test-candidate)))) |
|---|
| 391 |
(alternations (convert-aux (third parse-tree)))) |
|---|
| 392 |
(when (and (not (numberp test)) |
|---|
| 393 |
(not (typep test 'lookahead)) |
|---|
| 394 |
(not (typep test 'lookbehind))) |
|---|
| 395 |
(signal-syntax-error "Branch test must be look-ahead, look-behind or number: ~S." parse-tree)) |
|---|
| 396 |
(typecase alternations |
|---|
| 397 |
(alternation |
|---|
| 398 |
(case (length (choices alternations)) |
|---|
| 399 |
((0) |
|---|
| 400 |
(signal-syntax-error "No choices in branch: ~S." parse-tree)) |
|---|
| 401 |
((1) |
|---|
| 402 |
(make-instance 'branch |
|---|
| 403 |
:test test |
|---|
| 404 |
:then-regex (first |
|---|
| 405 |
(choices alternations)))) |
|---|
| 406 |
((2) |
|---|
| 407 |
(make-instance 'branch |
|---|
| 408 |
:test test |
|---|
| 409 |
:then-regex (first |
|---|
| 410 |
(choices alternations)) |
|---|
| 411 |
:else-regex (second |
|---|
| 412 |
(choices alternations)))) |
|---|
| 413 |
(otherwise |
|---|
| 414 |
(signal-syntax-error "Too much choices in branch: ~S." parse-tree)))) |
|---|
| 415 |
(t |
|---|
| 416 |
(make-instance 'branch |
|---|
| 417 |
:test test |
|---|
| 418 |
:then-regex alternations))))) |
|---|
| 419 |
|
|---|
| 420 |
(defmethod convert-compound-parse-tree ((token (eql :positive-lookahead)) parse-tree &key) |
|---|
| 421 |
"The case for \(:POSITIVE-LOOKAHEAD <regex>)." |
|---|
| 422 |
(declare #.*standard-optimize-settings*) |
|---|
| 423 |
(declare (special flags accumulate-start-p)) |
|---|
| 424 |
;; keep the effect of modifiers local to the enclosed regex and stop |
|---|
| 425 |
;; accumulating into STARTS-WITH |
|---|
| 426 |
(setq accumulate-start-p nil) |
|---|
| 427 |
(let ((flags (copy-list flags))) |
|---|
| 428 |
(declare (special flags)) |
|---|
| 429 |
(make-instance 'lookahead |
|---|
| 430 |
:regex (convert-aux (second parse-tree)) |
|---|
| 431 |
:positivep t))) |
|---|
| 432 |
|
|---|
| 433 |
(defmethod convert-compound-parse-tree ((token (eql :negative-lookahead)) parse-tree &key) |
|---|
| 434 |
"The case for \(:NEGATIVE-LOOKAHEAD <regex>)." |
|---|
| 435 |
(declare #.*standard-optimize-settings*) |
|---|
| 436 |
;; do the same as for positive look-aheads and just switch afterwards |
|---|
| 437 |
(let ((regex (convert-compound-parse-tree :positive-lookahead parse-tree))) |
|---|
| 438 |
(setf (slot-value regex 'positivep) nil) |
|---|
| 439 |
regex)) |
|---|
| 440 |
|
|---|
| 441 |
(defmethod convert-compound-parse-tree ((token (eql :positive-lookbehind)) parse-tree &key) |
|---|
| 442 |
"The case for \(:POSITIVE-LOOKBEHIND <regex>)." |
|---|
| 443 |
(declare #.*standard-optimize-settings*) |
|---|
| 444 |
(declare (special flags accumulate-start-p)) |
|---|
| 445 |
;; keep the effect of modifiers local to the enclosed regex and stop |
|---|
| 446 |
;; accumulating into STARTS-WITH |
|---|
| 447 |
(setq accumulate-start-p nil) |
|---|
| 448 |
(let* ((flags (copy-list flags)) |
|---|
| 449 |
(regex (convert-aux (second parse-tree))) |
|---|
| 450 |
(len (regex-length regex))) |
|---|
| 451 |
(declare (special flags)) |
|---|
| 452 |
;; lookbehind assertions must be of fixed length |
|---|
| 453 |
(unless len |
|---|
| 454 |
(signal-syntax-error "Variable length look-behind not implemented \(yet): ~S." parse-tree)) |
|---|
| 455 |
(make-instance 'lookbehind |
|---|
| 456 |
:regex regex |
|---|
| 457 |
:positivep t |
|---|
| 458 |
:len len))) |
|---|
| 459 |
|
|---|
| 460 |
(defmethod convert-compound-parse-tree ((token (eql :negative-lookbehind)) parse-tree &key) |
|---|
| 461 |
"The case for \(:NEGATIVE-LOOKBEHIND <regex>)." |
|---|
| 462 |
(declare #.*standard-optimize-settings*) |
|---|
| 463 |
;; do the same as for positive look-behinds and just switch afterwards |
|---|
| 464 |
(let ((regex (convert-compound-parse-tree :positive-lookbehind parse-tree))) |
|---|
| 465 |
(setf (slot-value regex 'positivep) nil) |
|---|
| 466 |
regex)) |
|---|
| 467 |
|
|---|
| 468 |
(defmethod convert-compound-parse-tree ((token (eql :greedy-repetition)) parse-tree &key (greedyp t)) |
|---|
| 469 |
"The case for \(:GREEDY-REPETITION|:NON-GREEDY-REPETITION <min> <max> <regex>). |
|---|
| 470 |
|
|---|
| 471 |
This function is also used for the non-greedy case in which case it is |
|---|
| 472 |
called with GREEDYP set to NIL as you would expect." |
|---|
| 473 |
(declare #.*standard-optimize-settings*) |
|---|
| 474 |
(declare (special accumulate-start-p starts-with)) |
|---|
| 475 |
;; remember the value of ACCUMULATE-START-P upon entering |
|---|
| 476 |
(let ((local-accumulate-start-p accumulate-start-p)) |
|---|
| 477 |
(let ((minimum (second parse-tree)) |
|---|
| 478 |
(maximum (third parse-tree))) |
|---|
| 479 |
(declare (fixnum minimum)) |
|---|
| 480 |
(declare (type (or null fixnum) maximum)) |
|---|
| 481 |
(unless (and maximum |
|---|
| 482 |
(= 1 minimum maximum)) |
|---|
| 483 |
;; set ACCUMULATE-START-P to NIL for the rest of |
|---|
| 484 |
;; the conversion because we can't continue to |
|---|
| 485 |
;; accumulate inside as well as after a proper |
|---|
| 486 |
;; repetition |
|---|
| 487 |
(setq accumulate-start-p nil)) |
|---|
| 488 |
(let* (reg-seen |
|---|
| 489 |
(regex (convert-aux (fourth parse-tree))) |
|---|
| 490 |
(min-len (regex-min-length regex)) |
|---|
| 491 |
(length (regex-length regex))) |
|---|
| 492 |
;; note that this declaration already applies to |
|---|
| 493 |
;; the call to CONVERT-AUX above |
|---|
| 494 |
(declare (special reg-seen)) |
|---|
| 495 |
(when (and local-accumulate-start-p |
|---|
| 496 |
(not starts-with) |
|---|
| 497 |
(zerop minimum) |
|---|
| 498 |
(not maximum)) |
|---|
| 499 |
;; if this repetition is (equivalent to) ".*" |
|---|
| 500 |
;; and if we're at the start of the regex we |
|---|
| 501 |
;; remember it for ADVANCE-FN (see the SCAN |
|---|
| 502 |
;; function) |
|---|
| 503 |
(setq starts-with (everythingp regex))) |
|---|
| 504 |
(if (or (not reg-seen) |
|---|
| 505 |
(not greedyp) |
|---|
| 506 |
(not length) |
|---|
| 507 |
(zerop length) |
|---|
| 508 |
(and maximum (= minimum maximum))) |
|---|
| 509 |
;; the repetition doesn't enclose a register, or |
|---|
| 510 |
;; it's not greedy, or we can't determine it's |
|---|
| 511 |
;; (inner) length, or the length is zero, or the |
|---|
| 512 |
;; number of repetitions is fixed; in all of |
|---|
| 513 |
;; these cases we don't bother to optimize |
|---|
| 514 |
(maybe-split-repetition regex |
|---|
| 515 |
greedyp |
|---|
| 516 |
minimum |
|---|
| 517 |
maximum |
|---|
| 518 |
min-len |
|---|
| 519 |
length |
|---|
| 520 |
reg-seen) |
|---|
| 521 |
;; otherwise we make a transformation that looks |
|---|
| 522 |
;; roughly like one of |
|---|
| 523 |
;; <regex>* -> (?:<regex'>*<regex>)? |
|---|
| 524 |
;; <regex>+ -> <regex'>*<regex> |
|---|
| 525 |
;; where the trick is that as much as possible |
|---|
| 526 |
;; registers from <regex> are removed in |
|---|
| 527 |
;; <regex'> |
|---|
| 528 |
(let* (reg-seen ; new instance for REMOVE-REGISTERS |
|---|
| 529 |
(remove-registers-p t) |
|---|
| 530 |
(inner-regex (remove-registers regex)) |
|---|
| 531 |
(inner-repetition |
|---|
| 532 |
;; this is the "<regex'>" part |
|---|
| 533 |
(maybe-split-repetition inner-regex |
|---|
| 534 |
;; always greedy |
|---|
| 535 |
t |
|---|
| 536 |
;; reduce minimum by 1 |
|---|
| 537 |
;; unless it's already 0 |
|---|
| 538 |
(if (zerop minimum) |
|---|
| 539 |
0 |
|---|
| 540 |
(1- minimum)) |
|---|
| 541 |
;; reduce maximum by 1 |
|---|
| 542 |
;; unless it's NIL |
|---|
| 543 |
(and maximum |
|---|
| 544 |
(1- maximum)) |
|---|
| 545 |
min-len |
|---|
| 546 |
length |
|---|
| 547 |
reg-seen)) |
|---|
| 548 |
(inner-seq |
|---|
| 549 |
;; this is the "<regex'>*<regex>" part |
|---|
| 550 |
(make-instance 'seq |
|---|
| 551 |
:elements (list inner-repetition |
|---|
| 552 |
regex)))) |
|---|
| 553 |
;; note that this declaration already applies |
|---|
| 554 |
;; to the call to REMOVE-REGISTERS above |
|---|
| 555 |
(declare (special remove-registers-p reg-seen)) |
|---|
| 556 |
;; wrap INNER-SEQ with a greedy |
|---|
| 557 |
;; {0,1}-repetition (i.e. "?") if necessary |
|---|
| 558 |
(if (plusp minimum) |
|---|
| 559 |
inner-seq |
|---|
| 560 |
(maybe-split-repetition inner-seq |
|---|
| 561 |
t |
|---|
| 562 |
0 |
|---|
| 563 |
1 |
|---|
| 564 |
min-len |
|---|
| 565 |
nil |
|---|
| 566 |
t)))))))) |
|---|
| 567 |
|
|---|
| 568 |
(defmethod convert-compound-parse-tree ((token (eql :non-greedy-repetition)) parse-tree &key) |
|---|
| 569 |
"The case for \(:NON-GREEDY-REPETITION <min> <max> <regex>)." |
|---|
| 570 |
(declare #.*standard-optimize-settings*) |
|---|
| 571 |
;; just dispatch to the method above with GREEDYP explicitly set to NIL |
|---|
| 572 |
(convert-compound-parse-tree :greedy-repetition parse-tree :greedyp nil)) |
|---|
| 573 |
|
|---|
| 574 |
(defmethod convert-compound-parse-tree ((token (eql :register)) parse-tree &key name) |
|---|
| 575 |
"The case for \(:REGISTER <regex>). Also used for named registers |
|---|
| 576 |
when NAME is not NIL." |
|---|
| 577 |
(declare #.*standard-optimize-settings*) |
|---|
| 578 |
(declare (special flags reg-num reg-names)) |
|---|
| 579 |
;; keep the effect of modifiers local to the enclosed regex; also, |
|---|
| 580 |
;; assign the current value of REG-NUM to the corresponding slot of |
|---|
| 581 |
;; the REGISTER object and increase this counter afterwards; for |
|---|
| 582 |
;; named register update REG-NAMES and set the corresponding name |
|---|
| 583 |
;; slot of the REGISTER object too |
|---|
| 584 |
(let ((flags (copy-list flags)) |
|---|
| 585 |
(stored-reg-num reg-num)) |
|---|
| 586 |
(declare (special flags reg-seen named-reg-seen)) |
|---|
| 587 |
(setq reg-seen t) |
|---|
| 588 |
(when name (setq named-reg-seen t)) |
|---|
| 589 |
(incf (the fixnum reg-num)) |
|---|
| 590 |
(push name reg-names) |
|---|
| 591 |
(make-instance 'register |
|---|
| 592 |
:regex (convert-aux (if name (third parse-tree) (second parse-tree))) |
|---|
| 593 |
:num stored-reg-num |
|---|
| 594 |
:name name))) |
|---|
| 595 |
|
|---|
| 596 |
(defmethod convert-compound-parse-tree ((token (eql :named-register)) parse-tree &key) |
|---|
| 597 |
"The case for \(:NAMED-REGISTER <regex>)." |
|---|
| 598 |
(declare #.*standard-optimize-settings*) |
|---|
| 599 |
;; call the method above and use the :NAME keyword argument |
|---|
| 600 |
(convert-compound-parse-tree :register parse-tree :name (copy-seq (second parse-tree)))) |
|---|
| 601 |
|
|---|
| 602 |
(defmethod convert-compound-parse-tree ((token (eql :filter)) parse-tree &key) |
|---|
| 603 |
"The case for \(:FILTER <function> &optional <length>)." |
|---|
| 604 |
(declare #.*standard-optimize-settings*) |
|---|
| 605 |
(declare (special accumulate-start-p)) |
|---|
| 606 |
;; stop accumulating into STARTS-WITH |
|---|
| 607 |
(setq accumulate-start-p nil) |
|---|
| 608 |
(make-instance 'filter |
|---|
| 609 |
:fn (second parse-tree) |
|---|
| 610 |
:len (third parse-tree))) |
|---|
| 611 |
|
|---|
| 612 |
(defmethod convert-compound-parse-tree ((token (eql :standalone)) parse-tree &key) |
|---|
| 613 |
"The case for \(:STANDALONE <regex>)." |
|---|
| 614 |
(declare #.*standard-optimize-settings*) |
|---|
| 615 |
(declare (special flags accumulate-start-p)) |
|---|
| 616 |
;; stop accumulating into STARTS-WITH |
|---|
| 617 |
(setq accumulate-start-p nil) |
|---|
| 618 |
;; keep the effect of modifiers local to the enclosed regex |
|---|
| 619 |
(let ((flags (copy-list flags))) |
|---|
| 620 |
(declare (special flags)) |
|---|
| 621 |
(make-instance 'standalone :regex (convert-aux (second parse-tree))))) |
|---|
| 622 |
|
|---|
| 623 |
(defmethod convert-compound-parse-tree ((token (eql :back-reference)) parse-tree &key) |
|---|
| 624 |
"The case for \(:BACK-REFERENCE <number>|<name>)." |
|---|
| 625 |
(declare #.*standard-optimize-settings*) |
|---|
| 626 |
(declare (special flags accumulate-start-p reg-num reg-names max-back-ref)) |
|---|
| 627 |
(let* ((backref-name (and (stringp (second parse-tree)) |
|---|
| 628 |
(second parse-tree))) |
|---|
| 629 |
(referred-regs |
|---|
| 630 |
(when backref-name |
|---|
| 631 |
;; find which register corresponds to the given name |
|---|
| 632 |
;; we have to deal with case where several registers share |
|---|
| 633 |
;; the same name and collect their respective numbers |
|---|
| 634 |
(loop for name in reg-names |
|---|
| 635 |
for reg-index from 0 |
|---|
| 636 |
when (string= name backref-name) |
|---|
| 637 |
;; NOTE: REG-NAMES stores register names in reversed |
|---|
| 638 |
;; order REG-NUM contains number of (any) registers |
|---|
| 639 |
;; seen so far; 1- will be done later |
|---|
| 640 |
collect (- reg-num reg-index)))) |
|---|
| 641 |
;; store the register number for the simple case |
|---|
| 642 |
(backref-number (or (first referred-regs) (second parse-tree)))) |
|---|
| 643 |
(declare (type (or fixnum null) backref-number)) |
|---|
| 644 |
(when (or (not (typep backref-number 'fixnum)) |
|---|
| 645 |
(<= backref-number 0)) |
|---|
| 646 |
(signal-syntax-error "Illegal back-reference: ~S." parse-tree)) |
|---|
| 647 |
;; stop accumulating into STARTS-WITH and increase MAX-BACK-REF if |
|---|
| 648 |
;; necessary |
|---|
| 649 |
(setq accumulate-start-p nil |
|---|
| 650 |
max-back-ref (max (the fixnum max-back-ref) |
|---|
| 651 |
backref-number)) |
|---|
| 652 |
(flet ((make-back-ref (backref-number) |
|---|
| 653 |
(make-instance 'back-reference |
|---|
| 654 |
;; we start counting from 0 internally |
|---|
| 655 |
:num (1- backref-number) |
|---|
| 656 |
:case-insensitive-p (case-insensitive-mode-p flags) |
|---|
| 657 |
;; backref-name is NIL or string, safe to copy |
|---|
| 658 |
:name (copy-seq backref-name)))) |
|---|
| 659 |
(cond |
|---|
| 660 |
((cdr referred-regs) |
|---|
| 661 |
;; several registers share the same name we will try to match |
|---|
| 662 |
;; any of them, starting with the most recent first |
|---|
| 663 |
;; alternation is used to accomplish matching |
|---|
| 664 |
(make-instance 'alternation |
|---|
| 665 |
:choices (loop |
|---|
| 666 |
for reg-index in referred-regs |
|---|
| 667 |
collect (make-back-ref reg-index)))) |
|---|
| 668 |
;; simple case - backref corresponds to only one register |
|---|
| 669 |
(t |
|---|
| 670 |
(make-back-ref backref-number)))))) |
|---|
| 671 |
|
|---|
| 672 |
(defmethod convert-compound-parse-tree ((token (eql :regex)) parse-tree &key) |
|---|
| 673 |
"The case for \(:REGEX <string>)." |
|---|
| 674 |
(declare #.*standard-optimize-settings*) |
|---|
| 675 |
(convert-aux (parse-string (second parse-tree)))) |
|---|
| 676 |
|
|---|
| 677 |
(defmethod convert-compound-parse-tree ((token (eql :char-class)) parse-tree &key invertedp) |
|---|
| 678 |
"The case for \(:CHAR-CLASS {<item>}*) where item is one of |
|---|
| 679 |
|
|---|
| 680 |
- a character, |
|---|
| 681 |
- a character range: \(:RANGE <char1> <char2>), or |
|---|
| 682 |
- a special char class symbol like :DIGIT-CHAR-CLASS. |
|---|
| 683 |
|
|---|
| 684 |
Also used for inverted char classes when INVERTEDP is true." |
|---|
| 685 |
(declare #.*standard-optimize-settings*) |
|---|
| 686 |
(declare (special flags accumulate-start-p)) |
|---|
| 687 |
(let ((test-function |
|---|
| 688 |
(create-optimized-test-function |
|---|
| 689 |
(convert-char-class-to-test-function (rest parse-tree) |
|---|
| 690 |
invertedp |
|---|
| 691 |
(case-insensitive-mode-p flags))))) |
|---|
| 692 |
(setq accumulate-start-p nil) |
|---|
| 693 |
(make-instance 'char-class :test-function test-function))) |
|---|
| 694 |
|
|---|
| 695 |
(defmethod convert-compound-parse-tree ((token (eql :inverted-char-class)) parse-tree &key) |
|---|
| 696 |
"The case for \(:INVERTED-CHAR-CLASS {<item>}*)." |
|---|
| 697 |
(declare #.*standard-optimize-settings*) |
|---|
| 698 |
;; just dispatch to the "real" method |
|---|
| 699 |
(convert-compound-parse-tree :char-class parse-tree :invertedp t)) |
|---|
| 700 |
|
|---|
| 701 |
(defmethod convert-compound-parse-tree ((token (eql :property)) parse-tree &key) |
|---|
| 702 |
"The case for \(:PROPERTY <name>) where <name> is a string." |
|---|
| 703 |
(declare #.*standard-optimize-settings*) |
|---|
| 704 |
(make-instance 'char-class :test-function (resolve-property (second parse-tree)))) |
|---|
| 705 |
|
|---|
| 706 |
(defmethod convert-compound-parse-tree ((token (eql :inverted-property)) parse-tree &key) |
|---|
| 707 |
"The case for \(:INVERTED-PROPERTY <name>) where <name> is a string." |
|---|
| 708 |
(declare #.*standard-optimize-settings*) |
|---|
| 709 |
(make-instance 'char-class :test-function (complement* (resolve-property (second parse-tree))))) |
|---|
| 710 |
|
|---|
| 711 |
(defmethod convert-compound-parse-tree ((token (eql :flags)) parse-tree &key) |
|---|
| 712 |
"The case for \(:FLAGS {<flag>}*) where flag is a modifier symbol |
|---|
| 713 |
like :CASE-INSENSITIVE-P." |
|---|
| 714 |
(declare #.*standard-optimize-settings*) |
|---|
| 715 |
;; set/unset the flags corresponding to the symbols |
|---|
| 716 |
;; following :FLAGS |
|---|
| 717 |
(mapc #'set-flag (rest parse-tree)) |
|---|
| 718 |
;; we're only interested in the side effect of |
|---|
| 719 |
;; setting/unsetting the flags and turn this syntactical |
|---|
| 720 |
;; construct into a VOID object which'll be optimized |
|---|
| 721 |
;; away when creating the matcher |
|---|
| 722 |
(make-instance 'void)) |
|---|
| 723 |
|
|---|
| 724 |
(defgeneric convert-simple-parse-tree (parse-tree) |
|---|
| 725 |
(declare #.*standard-optimize-settings*) |
|---|
| 726 |
(:documentation "Helper function for CONVERT-AUX which converts |
|---|
| 727 |
parse trees which are atoms.") |
|---|
| 728 |
(:method ((parse-tree (eql :void))) |
|---|
| 729 |
(declare #.*standard-optimize-settings*) |
|---|
| 730 |
(make-instance 'void)) |
|---|
| 731 |
(:method ((parse-tree (eql :word-boundary))) |
|---|
| 732 |
(declare #.*standard-optimize-settings*) |
|---|
| 733 |
(make-instance 'word-boundary :negatedp nil)) |
|---|
| 734 |
(:method ((parse-tree (eql :non-word-boundary))) |
|---|
| 735 |
(declare #.*standard-optimize-settings*) |
|---|
| 736 |
(make-instance 'word-boundary :negatedp t)) |
|---|
| 737 |
(:method ((parse-tree (eql :everything))) |
|---|
| 738 |
(declare #.*standard-optimize-settings*) |
|---|
| 739 |
(declare (special flags accumulate-start-p)) |
|---|
| 740 |
(setq accumulate-start-p nil) |
|---|
| 741 |
(make-instance 'everything :single-line-p (single-line-mode-p flags))) |
|---|
| 742 |
(:method ((parse-tree (eql :digit-class))) |
|---|
| 743 |
(declare #.*standard-optimize-settings*) |
|---|
| 744 |
(declare (special accumulate-start-p)) |
|---|
| 745 |
(setq accumulate-start-p nil) |
|---|
| 746 |
(make-instance 'char-class :test-function #'digit-char-p)) |
|---|
| 747 |
(:method ((parse-tree (eql :word-char-class))) |
|---|
| 748 |
(declare #.*standard-optimize-settings*) |
|---|
| 749 |
(declare (special accumulate-start-p)) |
|---|
| 750 |
(setq accumulate-start-p nil) |
|---|
| 751 |
(make-instance 'char-class :test-function #'word-char-p)) |
|---|
| 752 |
(:method ((parse-tree (eql :whitespace-char-class))) |
|---|
| 753 |
(declare #.*standard-optimize-settings*) |
|---|
| 754 |
(declare (special accumulate-start-p)) |
|---|
| 755 |
(setq accumulate-start-p nil) |
|---|
| 756 |
(make-instance 'char-class :test-function #'whitespacep)) |
|---|
| 757 |
(:method ((parse-tree (eql :non-digit-class))) |
|---|
| 758 |
(declare #.*standard-optimize-settings*) |
|---|
| 759 |
(declare (special accumulate-start-p)) |
|---|
| 760 |
(setq accumulate-start-p nil) |
|---|
| 761 |
(make-instance 'char-class :test-function (complement* #'digit-char-p))) |
|---|
| 762 |
(:method ((parse-tree (eql :non-word-char-class))) |
|---|
| 763 |
(declare #.*standard-optimize-settings*) |
|---|
| 764 |
(declare (special accumulate-start-p)) |
|---|
| 765 |
(setq accumulate-start-p nil) |
|---|
| 766 |
(make-instance 'char-class :test-function (complement* #'word-char-p))) |
|---|
| 767 |
(:method ((parse-tree (eql :non-whitespace-char-class))) |
|---|
| 768 |
(declare #.*standard-optimize-settings*) |
|---|
| 769 |
(declare (special accumulate-start-p)) |
|---|
| 770 |
(setq accumulate-start-p nil) |
|---|
| 771 |
(make-instance 'char-class :test-function (complement* #'whitespacep))) |
|---|
| 772 |
(:method ((parse-tree (eql :start-anchor))) |
|---|
| 773 |
;; Perl's "^" |
|---|
| 774 |
(declare #.*standard-optimize-settings*) |
|---|
| 775 |
(declare (special flags)) |
|---|
| 776 |
(make-instance 'anchor :startp t :multi-line-p (multi-line-mode-p flags))) |
|---|
| 777 |
(:method ((parse-tree (eql :end-anchor))) |
|---|
| 778 |
;; Perl's "$" |
|---|
| 779 |
(declare #.*standard-optimize-settings*) |
|---|
| 780 |
(declare (special flags)) |
|---|
| 781 |
(make-instance 'anchor :startp nil :multi-line-p (multi-line-mode-p flags))) |
|---|
| 782 |
(:method ((parse-tree (eql :modeless-start-anchor))) |
|---|
| 783 |
;; Perl's "\A" |
|---|
| 784 |
(declare #.*standard-optimize-settings*) |
|---|
| 785 |
(make-instance 'anchor :startp t)) |
|---|
| 786 |
(:method ((parse-tree (eql :modeless-end-anchor))) |
|---|
| 787 |
;; Perl's "$\Z" |
|---|
| 788 |
(declare #.*standard-optimize-settings*) |
|---|
| 789 |
(make-instance 'anchor :startp nil)) |
|---|
| 790 |
(:method ((parse-tree (eql :modeless-end-anchor-no-newline))) |
|---|
| 791 |
;; Perl's "$\z" |
|---|
| 792 |
(declare #.*standard-optimize-settings*) |
|---|
| 793 |
(make-instance 'anchor :startp nil :no-newline-p t)) |
|---|
| 794 |
(:method ((parse-tree (eql :case-insensitive-p))) |
|---|
| 795 |
(declare #.*standard-optimize-settings*) |
|---|
| 796 |
(set-flag parse-tree) |
|---|
| 797 |
(make-instance 'void)) |
|---|
| 798 |
(:method ((parse-tree (eql :case-sensitive-p))) |
|---|
| 799 |
(declare #.*standard-optimize-settings*) |
|---|
| 800 |
(set-flag parse-tree) |
|---|
| 801 |
(make-instance 'void)) |
|---|
| 802 |
(:method ((parse-tree (eql :multi-line-mode-p))) |
|---|
| 803 |
(declare #.*standard-optimize-settings*) |
|---|
| 804 |
(set-flag parse-tree) |
|---|
| 805 |
(make-instance 'void)) |
|---|
| 806 |
(:method ((parse-tree (eql :not-multi-line-mode-p))) |
|---|
| 807 |
(declare #.*standard-optimize-settings*) |
|---|
| 808 |
(set-flag parse-tree) |
|---|
| 809 |
(make-instance 'void)) |
|---|
| 810 |
(:method ((parse-tree (eql :single-line-mode-p))) |
|---|
| 811 |
(declare #.*standard-optimize-settings*) |
|---|
| 812 |
(set-flag parse-tree) |
|---|
| 813 |
(make-instance 'void)) |
|---|
| 814 |
(:method ((parse-tree (eql :not-single-line-mode-p))) |
|---|
| 815 |
(declare #.*standard-optimize-settings*) |
|---|
| 816 |
(set-flag parse-tree) |
|---|
| 817 |
(make-instance 'void))) |
|---|
| 818 |
|
|---|
| 819 |
(defmethod convert-simple-parse-tree ((parse-tree string)) |
|---|
| 820 |
(declare #.*standard-optimize-settings*) |
|---|
| 821 |
(declare (special flags)) |
|---|
| 822 |
;; turn strings into STR objects and try to accumulate into |
|---|
| 823 |
;; STARTS-WITH |
|---|
| 824 |
(let ((str (make-instance 'str |
|---|
| 825 |
:str parse-tree |
|---|
| 826 |
:case-insensitive-p (case-insensitive-mode-p flags)))) |
|---|
| 827 |
(maybe-accumulate str) |
|---|
| 828 |
str)) |
|---|
| 829 |
|
|---|
| 830 |
(defmethod convert-simple-parse-tree ((parse-tree character)) |
|---|
| 831 |
(declare #.*standard-optimize-settings*) |
|---|
| 832 |
;; dispatch to the method for strings |
|---|
| 833 |
(convert-simple-parse-tree (string parse-tree))) |
|---|
| 834 |
|
|---|
| 835 |
(defmethod convert-simple-parse-tree (parse-tree) |
|---|
| 836 |
"The default method - check if there's a translation." |
|---|
| 837 |
(declare #.*standard-optimize-settings*) |
|---|
| 838 |
(let ((translation (and (symbolp parse-tree) (parse-tree-synonym parse-tree)))) |
|---|
| 839 |
(if translation |
|---|
| 840 |
(convert-aux (copy-tree translation)) |
|---|
| 841 |
(signal-syntax-error "Unknown token ~A in parse tree." parse-tree)))) |
|---|
| 842 |
|
|---|
| 843 |
(defun convert (parse-tree) |
|---|
| 844 |
"Converts the parse tree PARSE-TREE into an equivalent REGEX object |
|---|
| 845 |
and returns three values: the REGEX object, the number of registers |
|---|
| 846 |
seen and an object the regex starts with which is either a STR object |
|---|
| 847 |
or an EVERYTHING object \(if the regex starts with something like |
|---|
| 848 |
\".*\") or NIL." |
|---|
| 849 |
(declare #.*standard-optimize-settings*) |
|---|
| 850 |
;; this function basically just initializes the special variables |
|---|
| 851 |
;; and then calls CONVERT-AUX to do all the work |
|---|
| 852 |
(let* ((flags (list nil nil nil)) |
|---|
| 853 |
(reg-num 0) |
|---|
| 854 |
reg-names |
|---|
| 855 |
named-reg-seen |
|---|
| 856 |
(accumulate-start-p t) |
|---|
| 857 |
starts-with |
|---|
| 858 |
(max-back-ref 0) |
|---|
| 859 |
(converted-parse-tree (convert-aux parse-tree))) |
|---|
| 860 |
(declare (special flags reg-num reg-names named-reg-seen |
|---|
| 861 |
accumulate-start-p starts-with max-back-ref)) |
|---|
| 862 |
;; make sure we don't reference registers which aren't there |
|---|
| 863 |
(when (> (the fixnum max-back-ref) |
|---|
| 864 |
(the fixnum reg-num)) |
|---|
| 865 |
(signal-syntax-error "Backreference to register ~A which has not been defined." max-back-ref)) |
|---|
| 866 |
(when (typep starts-with 'str) |
|---|
| 867 |
(setf (slot-value starts-with 'str) |
|---|
| 868 |
(coerce (slot-value starts-with 'str) |
|---|
| 869 |
#+:lispworks 'lw:simple-text-string |
|---|
| 870 |
#-:lispworks 'simple-string))) |
|---|
| 871 |
(values converted-parse-tree reg-num starts-with |
|---|
| 872 |
;; we can't simply use *ALLOW-NAMED-REGISTERS* |
|---|
| 873 |
;; since parse-tree syntax ignores it |
|---|
| 874 |
(when named-reg-seen |
|---|
| 875 |
(nreverse reg-names))))) |
|---|