| 1 |
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*- |
|---|
| 2 |
;;; $Header: /usr/local/cvsrep/cl-ppcre/scanner.lisp,v 1.35 2008/07/23 22:25:15 edi Exp $ |
|---|
| 3 |
|
|---|
| 4 |
;;; Here the scanner for the actual regex as well as utility scanners |
|---|
| 5 |
;;; for the constant start and end strings are created. |
|---|
| 6 |
|
|---|
| 7 |
;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved. |
|---|
| 8 |
|
|---|
| 9 |
;;; Redistribution and use in source and binary forms, with or without |
|---|
| 10 |
;;; modification, are permitted provided that the following conditions |
|---|
| 11 |
;;; are met: |
|---|
| 12 |
|
|---|
| 13 |
;;; * Redistributions of source code must retain the above copyright |
|---|
| 14 |
;;; notice, this list of conditions and the following disclaimer. |
|---|
| 15 |
|
|---|
| 16 |
;;; * Redistributions in binary form must reproduce the above |
|---|
| 17 |
;;; copyright notice, this list of conditions and the following |
|---|
| 18 |
;;; disclaimer in the documentation and/or other materials |
|---|
| 19 |
;;; provided with the distribution. |
|---|
| 20 |
|
|---|
| 21 |
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED |
|---|
| 22 |
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|---|
| 23 |
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|---|
| 24 |
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
|---|
| 25 |
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|---|
| 26 |
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE |
|---|
| 27 |
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|---|
| 28 |
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|---|
| 29 |
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|---|
| 30 |
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|---|
| 31 |
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 32 |
|
|---|
| 33 |
(in-package :cl-ppcre) |
|---|
| 34 |
|
|---|
| 35 |
(defmacro bmh-matcher-aux (&key case-insensitive-p) |
|---|
| 36 |
"Auxiliary macro used by CREATE-BMH-MATCHER." |
|---|
| 37 |
(let ((char-compare (if case-insensitive-p 'char-equal 'char=))) |
|---|
| 38 |
`(lambda (start-pos) |
|---|
| 39 |
(declare (fixnum start-pos)) |
|---|
| 40 |
(if (or (minusp start-pos) |
|---|
| 41 |
(> (the fixnum (+ start-pos m)) *end-pos*)) |
|---|
| 42 |
nil |
|---|
| 43 |
(loop named bmh-matcher |
|---|
| 44 |
for k of-type fixnum = (+ start-pos m -1) |
|---|
| 45 |
then (+ k (max 1 (aref skip (char-code (schar *string* k))))) |
|---|
| 46 |
while (< k *end-pos*) |
|---|
| 47 |
do (loop for j of-type fixnum downfrom (1- m) |
|---|
| 48 |
for i of-type fixnum downfrom k |
|---|
| 49 |
while (and (>= j 0) |
|---|
| 50 |
(,char-compare (schar *string* i) |
|---|
| 51 |
(schar pattern j))) |
|---|
| 52 |
finally (if (minusp j) |
|---|
| 53 |
(return-from bmh-matcher (1+ i))))))))) |
|---|
| 54 |
|
|---|
| 55 |
(defun create-bmh-matcher (pattern case-insensitive-p) |
|---|
| 56 |
"Returns a Boyer-Moore-Horspool matcher which searches the (special) |
|---|
| 57 |
simple-string *STRING* for the first occurence of the substring |
|---|
| 58 |
PATTERN. The search starts at the position START-POS within *STRING* |
|---|
| 59 |
and stops before *END-POS* is reached. Depending on the second |
|---|
| 60 |
argument the search is case-insensitive or not. If the special |
|---|
| 61 |
variable *USE-BMH-MATCHERS* is NIL, use the standard SEARCH function |
|---|
| 62 |
instead. \(BMH matchers are faster but need much more space.)" |
|---|
| 63 |
(declare #.*standard-optimize-settings*) |
|---|
| 64 |
;; see <http://www-igm.univ-mlv.fr/~lecroq/string/node18.html> for |
|---|
| 65 |
;; details |
|---|
| 66 |
(unless *use-bmh-matchers* |
|---|
| 67 |
(let ((test (if case-insensitive-p #'char-equal #'char=))) |
|---|
| 68 |
(return-from create-bmh-matcher |
|---|
| 69 |
(lambda (start-pos) |
|---|
| 70 |
(declare (fixnum start-pos)) |
|---|
| 71 |
(and (not (minusp start-pos)) |
|---|
| 72 |
(search pattern |
|---|
| 73 |
*string* |
|---|
| 74 |
:start2 start-pos |
|---|
| 75 |
:end2 *end-pos* |
|---|
| 76 |
:test test)))))) |
|---|
| 77 |
(let* ((m (length pattern)) |
|---|
| 78 |
(skip (make-array *regex-char-code-limit* |
|---|
| 79 |
:element-type 'fixnum |
|---|
| 80 |
:initial-element m))) |
|---|
| 81 |
(declare (fixnum m)) |
|---|
| 82 |
(loop for k of-type fixnum below m |
|---|
| 83 |
if case-insensitive-p |
|---|
| 84 |
do (setf (aref skip (char-code (char-upcase (schar pattern k)))) (- m k 1) |
|---|
| 85 |
(aref skip (char-code (char-downcase (schar pattern k)))) (- m k 1)) |
|---|
| 86 |
else |
|---|
| 87 |
do (setf (aref skip (char-code (schar pattern k))) (- m k 1))) |
|---|
| 88 |
(if case-insensitive-p |
|---|
| 89 |
(bmh-matcher-aux :case-insensitive-p t) |
|---|
| 90 |
(bmh-matcher-aux)))) |
|---|
| 91 |
|
|---|
| 92 |
(defmacro char-searcher-aux (&key case-insensitive-p) |
|---|
| 93 |
"Auxiliary macro used by CREATE-CHAR-SEARCHER." |
|---|
| 94 |
(let ((char-compare (if case-insensitive-p 'char-equal 'char=))) |
|---|
| 95 |
`(lambda (start-pos) |
|---|
| 96 |
(declare (fixnum start-pos)) |
|---|
| 97 |
(and (not (minusp start-pos)) |
|---|
| 98 |
(loop for i of-type fixnum from start-pos below *end-pos* |
|---|
| 99 |
thereis (and (,char-compare (schar *string* i) chr) i)))))) |
|---|
| 100 |
|
|---|
| 101 |
(defun create-char-searcher (chr case-insensitive-p) |
|---|
| 102 |
"Returns a function which searches the (special) simple-string |
|---|
| 103 |
*STRING* for the first occurence of the character CHR. The search |
|---|
| 104 |
starts at the position START-POS within *STRING* and stops before |
|---|
| 105 |
*END-POS* is reached. Depending on the second argument the search is |
|---|
| 106 |
case-insensitive or not." |
|---|
| 107 |
(declare #.*standard-optimize-settings*) |
|---|
| 108 |
(if case-insensitive-p |
|---|
| 109 |
(char-searcher-aux :case-insensitive-p t) |
|---|
| 110 |
(char-searcher-aux))) |
|---|
| 111 |
|
|---|
| 112 |
(declaim (inline newline-skipper)) |
|---|
| 113 |
(defun newline-skipper (start-pos) |
|---|
| 114 |
"Finds the next occurence of a character in *STRING* which is behind |
|---|
| 115 |
a #\Newline." |
|---|
| 116 |
(declare #.*standard-optimize-settings*) |
|---|
| 117 |
(declare (fixnum start-pos)) |
|---|
| 118 |
;; we can start with (1- START-POS) without testing for (PLUSP |
|---|
| 119 |
;; START-POS) because we know we'll never call NEWLINE-SKIPPER on |
|---|
| 120 |
;; the first iteration |
|---|
| 121 |
(loop for i of-type fixnum from (1- start-pos) below *end-pos* |
|---|
| 122 |
thereis (and (char= (schar *string* i) |
|---|
| 123 |
#\Newline) |
|---|
| 124 |
(1+ i)))) |
|---|
| 125 |
|
|---|
| 126 |
(defmacro insert-advance-fn (advance-fn) |
|---|
| 127 |
"Creates the actual closure returned by CREATE-SCANNER-AUX by |
|---|
| 128 |
replacing '(ADVANCE-FN-DEFINITION) with a suitable definition for |
|---|
| 129 |
ADVANCE-FN. This is a utility macro used by CREATE-SCANNER-AUX." |
|---|
| 130 |
(subst |
|---|
| 131 |
advance-fn '(advance-fn-definition) |
|---|
| 132 |
'(lambda (string start end) |
|---|
| 133 |
(block scan |
|---|
| 134 |
;; initialize a couple of special variables used by the |
|---|
| 135 |
;; matchers (see file specials.lisp) |
|---|
| 136 |
(let* ((*string* string) |
|---|
| 137 |
(*start-pos* start) |
|---|
| 138 |
(*end-pos* end) |
|---|
| 139 |
;; we will search forward for END-STRING if this value |
|---|
| 140 |
;; isn't at least as big as POS (see ADVANCE-FN), so it |
|---|
| 141 |
;; is safe to start to the left of *START-POS*; note |
|---|
| 142 |
;; that this value will _never_ be decremented - this |
|---|
| 143 |
;; is crucial to the scanning process |
|---|
| 144 |
(*end-string-pos* (1- *start-pos*)) |
|---|
| 145 |
;; the next five will shadow the variables defined by |
|---|
| 146 |
;; DEFPARAMETER; at this point, we don't know if we'll |
|---|
| 147 |
;; actually use them, though |
|---|
| 148 |
(*repeat-counters* *repeat-counters*) |
|---|
| 149 |
(*last-pos-stores* *last-pos-stores*) |
|---|
| 150 |
(*reg-starts* *reg-starts*) |
|---|
| 151 |
(*regs-maybe-start* *regs-maybe-start*) |
|---|
| 152 |
(*reg-ends* *reg-ends*) |
|---|
| 153 |
;; we might be able to optimize the scanning process by |
|---|
| 154 |
;; (virtually) shifting *START-POS* to the right |
|---|
| 155 |
(scan-start-pos *start-pos*) |
|---|
| 156 |
(starts-with-str (if start-string-test |
|---|
| 157 |
(str starts-with) |
|---|
| 158 |
nil)) |
|---|
| 159 |
;; we don't need to try further than MAX-END-POS |
|---|
| 160 |
(max-end-pos (- *end-pos* min-len))) |
|---|
| 161 |
(declare (fixnum scan-start-pos) |
|---|
| 162 |
(function match-fn)) |
|---|
| 163 |
;; definition of ADVANCE-FN will be inserted here by macrology |
|---|
| 164 |
(labels ((advance-fn-definition)) |
|---|
| 165 |
(declare (inline advance-fn)) |
|---|
| 166 |
(when (plusp rep-num) |
|---|
| 167 |
;; we have at least one REPETITION which needs to count |
|---|
| 168 |
;; the number of repetitions |
|---|
| 169 |
(setq *repeat-counters* (make-array rep-num |
|---|
| 170 |
:initial-element 0 |
|---|
| 171 |
:element-type 'fixnum))) |
|---|
| 172 |
(when (plusp zero-length-num) |
|---|
| 173 |
;; we have at least one REPETITION which needs to watch |
|---|
| 174 |
;; out for zero-length repetitions |
|---|
| 175 |
(setq *last-pos-stores* (make-array zero-length-num |
|---|
| 176 |
:initial-element nil))) |
|---|
| 177 |
(when (plusp reg-num) |
|---|
| 178 |
;; we have registers in our regular expression |
|---|
| 179 |
(setq *reg-starts* (make-array reg-num :initial-element nil) |
|---|
| 180 |
*regs-maybe-start* (make-array reg-num :initial-element nil) |
|---|
| 181 |
*reg-ends* (make-array reg-num :initial-element nil))) |
|---|
| 182 |
(when end-anchored-p |
|---|
| 183 |
;; the regular expression has a constant end string which |
|---|
| 184 |
;; is anchored at the very end of the target string |
|---|
| 185 |
;; (perhaps modulo a #\Newline) |
|---|
| 186 |
(let ((end-test-pos (- *end-pos* (the fixnum end-string-len)))) |
|---|
| 187 |
(declare (fixnum end-test-pos) |
|---|
| 188 |
(function end-string-test)) |
|---|
| 189 |
(unless (setq *end-string-pos* (funcall end-string-test |
|---|
| 190 |
end-test-pos)) |
|---|
| 191 |
(when (and (= 1 (the fixnum end-anchored-p)) |
|---|
| 192 |
(> *end-pos* scan-start-pos) |
|---|
| 193 |
(char= #\Newline (schar *string* (1- *end-pos*)))) |
|---|
| 194 |
;; if we didn't find an end string candidate from |
|---|
| 195 |
;; END-TEST-POS and if a #\Newline at the end is |
|---|
| 196 |
;; allowed we try it again from one position to the |
|---|
| 197 |
;; left |
|---|
| 198 |
(setq *end-string-pos* (funcall end-string-test |
|---|
| 199 |
(1- end-test-pos)))))) |
|---|
| 200 |
(unless (and *end-string-pos* |
|---|
| 201 |
(<= *start-pos* *end-string-pos*)) |
|---|
| 202 |
;; no end string candidate found, so give up |
|---|
| 203 |
(return-from scan nil)) |
|---|
| 204 |
(when end-string-offset |
|---|
| 205 |
;; if the offset of the constant end string from the |
|---|
| 206 |
;; left of the regular expression is known we can start |
|---|
| 207 |
;; scanning further to the right; this is similar to |
|---|
| 208 |
;; what we might do in ADVANCE-FN |
|---|
| 209 |
(setq scan-start-pos (max scan-start-pos |
|---|
| 210 |
(- (the fixnum *end-string-pos*) |
|---|
| 211 |
(the fixnum end-string-offset)))))) |
|---|
| 212 |
(cond |
|---|
| 213 |
(start-anchored-p |
|---|
| 214 |
;; we're anchored at the start of the target string, |
|---|
| 215 |
;; so no need to try again after first failure |
|---|
| 216 |
(when (or (/= *start-pos* scan-start-pos) |
|---|
| 217 |
(< max-end-pos *start-pos*)) |
|---|
| 218 |
;; if END-STRING-OFFSET has proven that we don't |
|---|
| 219 |
;; need to bother to scan from *START-POS* or if the |
|---|
| 220 |
;; minimal length of the regular expression is |
|---|
| 221 |
;; longer than the target string we give up |
|---|
| 222 |
(return-from scan nil)) |
|---|
| 223 |
(when starts-with-str |
|---|
| 224 |
(locally |
|---|
| 225 |
(declare (fixnum starts-with-len)) |
|---|
| 226 |
(cond ((and (case-insensitive-p starts-with) |
|---|
| 227 |
(not (*string*-equal starts-with-str |
|---|
| 228 |
*start-pos* |
|---|
| 229 |
(+ *start-pos* |
|---|
| 230 |
starts-with-len) |
|---|
| 231 |
0 starts-with-len))) |
|---|
| 232 |
;; the regular expression has a |
|---|
| 233 |
;; case-insensitive constant start string |
|---|
| 234 |
;; and we didn't find it |
|---|
| 235 |
(return-from scan nil)) |
|---|
| 236 |
((and (not (case-insensitive-p starts-with)) |
|---|
| 237 |
(not (*string*= starts-with-str |
|---|
| 238 |
*start-pos* |
|---|
| 239 |
(+ *start-pos* starts-with-len) |
|---|
| 240 |
0 starts-with-len))) |
|---|
| 241 |
;; the regular expression has a |
|---|
| 242 |
;; case-sensitive constant start string |
|---|
| 243 |
;; and we didn't find it |
|---|
| 244 |
(return-from scan nil)) |
|---|
| 245 |
(t nil)))) |
|---|
| 246 |
(when (and end-string-test |
|---|
| 247 |
(not end-anchored-p)) |
|---|
| 248 |
;; the regular expression has a constant end string |
|---|
| 249 |
;; which isn't anchored so we didn't check for it |
|---|
| 250 |
;; already |
|---|
| 251 |
(block end-string-loop |
|---|
| 252 |
;; we temporarily use *END-STRING-POS* as our |
|---|
| 253 |
;; starting position to look for end string |
|---|
| 254 |
;; candidates |
|---|
| 255 |
(setq *end-string-pos* *start-pos*) |
|---|
| 256 |
(loop |
|---|
| 257 |
(unless (setq *end-string-pos* |
|---|
| 258 |
(funcall (the function end-string-test) |
|---|
| 259 |
*end-string-pos*)) |
|---|
| 260 |
;; no end string candidate found, so give up |
|---|
| 261 |
(return-from scan nil)) |
|---|
| 262 |
(unless end-string-offset |
|---|
| 263 |
;; end string doesn't have an offset so we |
|---|
| 264 |
;; can start scanning now |
|---|
| 265 |
(return-from end-string-loop)) |
|---|
| 266 |
(let ((maybe-start-pos (- (the fixnum *end-string-pos*) |
|---|
| 267 |
(the fixnum end-string-offset)))) |
|---|
| 268 |
(cond ((= maybe-start-pos *start-pos*) |
|---|
| 269 |
;; offset of end string into regular |
|---|
| 270 |
;; expression matches start anchor - |
|---|
| 271 |
;; fine... |
|---|
| 272 |
(return-from end-string-loop)) |
|---|
| 273 |
((and (< maybe-start-pos *start-pos*) |
|---|
| 274 |
(< (+ *end-string-pos* end-string-len) *end-pos*)) |
|---|
| 275 |
;; no match but maybe we find another |
|---|
| 276 |
;; one to the right - try again |
|---|
| 277 |
(incf *end-string-pos*)) |
|---|
| 278 |
(t |
|---|
| 279 |
;; otherwise give up |
|---|
| 280 |
(return-from scan nil))))))) |
|---|
| 281 |
;; if we got here we scan exactly once |
|---|
| 282 |
(let ((next-pos (funcall match-fn *start-pos*))) |
|---|
| 283 |
(when next-pos |
|---|
| 284 |
(values (if next-pos *start-pos* nil) |
|---|
| 285 |
next-pos |
|---|
| 286 |
*reg-starts* |
|---|
| 287 |
*reg-ends*)))) |
|---|
| 288 |
(t |
|---|
| 289 |
(loop for pos = (if starts-with-everything |
|---|
| 290 |
;; don't jump to the next |
|---|
| 291 |
;; #\Newline on the first |
|---|
| 292 |
;; iteration |
|---|
| 293 |
scan-start-pos |
|---|
| 294 |
(advance-fn scan-start-pos)) |
|---|
| 295 |
then (advance-fn pos) |
|---|
| 296 |
;; give up if the regular expression can't fit |
|---|
| 297 |
;; into the rest of the target string |
|---|
| 298 |
while (and pos |
|---|
| 299 |
(<= (the fixnum pos) max-end-pos)) |
|---|
| 300 |
do (let ((next-pos (funcall match-fn pos))) |
|---|
| 301 |
(when next-pos |
|---|
| 302 |
(return-from scan (values pos |
|---|
| 303 |
next-pos |
|---|
| 304 |
*reg-starts* |
|---|
| 305 |
*reg-ends*))) |
|---|
| 306 |
;; not yet found, increment POS |
|---|
| 307 |
#-cormanlisp (incf (the fixnum pos)) |
|---|
| 308 |
#+cormanlisp (incf pos))))))))) |
|---|
| 309 |
:test #'equalp)) |
|---|
| 310 |
|
|---|
| 311 |
(defun create-scanner-aux (match-fn |
|---|
| 312 |
min-len |
|---|
| 313 |
start-anchored-p |
|---|
| 314 |
starts-with |
|---|
| 315 |
start-string-test |
|---|
| 316 |
end-anchored-p |
|---|
| 317 |
end-string-test |
|---|
| 318 |
end-string-len |
|---|
| 319 |
end-string-offset |
|---|
| 320 |
rep-num |
|---|
| 321 |
zero-length-num |
|---|
| 322 |
reg-num) |
|---|
| 323 |
"Auxiliary function to create and return a scanner \(which is |
|---|
| 324 |
actually a closure). Used by CREATE-SCANNER." |
|---|
| 325 |
(declare #.*standard-optimize-settings*) |
|---|
| 326 |
(declare (fixnum min-len zero-length-num rep-num reg-num)) |
|---|
| 327 |
(let ((starts-with-len (if (typep starts-with 'str) |
|---|
| 328 |
(len starts-with))) |
|---|
| 329 |
(starts-with-everything (typep starts-with 'everything))) |
|---|
| 330 |
(cond |
|---|
| 331 |
;; this COND statement dispatches on the different versions we |
|---|
| 332 |
;; have for ADVANCE-FN and creates different closures for each; |
|---|
| 333 |
;; note that you see only the bodies of ADVANCE-FN below - the |
|---|
| 334 |
;; actual scanner is defined in INSERT-ADVANCE-FN above; (we |
|---|
| 335 |
;; could have done this with closures instead of macrology but |
|---|
| 336 |
;; would have consed a lot more) |
|---|
| 337 |
((and start-string-test end-string-test end-string-offset) |
|---|
| 338 |
;; we know that the regular expression has constant start and |
|---|
| 339 |
;; end strings and we know the end string's offset (from the |
|---|
| 340 |
;; left) |
|---|
| 341 |
(insert-advance-fn |
|---|
| 342 |
(advance-fn (pos) |
|---|
| 343 |
(declare (fixnum end-string-offset starts-with-len) |
|---|
| 344 |
(function start-string-test end-string-test)) |
|---|
| 345 |
(loop |
|---|
| 346 |
(unless (setq pos (funcall start-string-test pos)) |
|---|
| 347 |
;; give up completely if we can't find a start string |
|---|
| 348 |
;; candidate |
|---|
| 349 |
(return-from scan nil)) |
|---|
| 350 |
(locally |
|---|
| 351 |
;; from here we know that POS is a FIXNUM |
|---|
| 352 |
(declare (fixnum pos)) |
|---|
| 353 |
(when (= pos (- (the fixnum *end-string-pos*) end-string-offset)) |
|---|
| 354 |
;; if we already found an end string candidate the |
|---|
| 355 |
;; position of which matches the start string |
|---|
| 356 |
;; candidate we're done |
|---|
| 357 |
(return-from advance-fn pos)) |
|---|
| 358 |
(let ((try-pos (+ pos starts-with-len))) |
|---|
| 359 |
;; otherwise try (again) to find an end string |
|---|
| 360 |
;; candidate which starts behind the start string |
|---|
| 361 |
;; candidate |
|---|
| 362 |
(loop |
|---|
| 363 |
(unless (setq *end-string-pos* |
|---|
| 364 |
(funcall end-string-test try-pos)) |
|---|
| 365 |
;; no end string candidate found, so give up |
|---|
| 366 |
(return-from scan nil)) |
|---|
| 367 |
;; NEW-POS is where we should start scanning |
|---|
| 368 |
;; according to the end string candidate |
|---|
| 369 |
(let ((new-pos (- (the fixnum *end-string-pos*) |
|---|
| 370 |
end-string-offset))) |
|---|
| 371 |
(declare (fixnum new-pos *end-string-pos*)) |
|---|
| 372 |
(cond ((= new-pos pos) |
|---|
| 373 |
;; if POS and NEW-POS are equal then the |
|---|
| 374 |
;; two candidates agree so we're fine |
|---|
| 375 |
(return-from advance-fn pos)) |
|---|
| 376 |
((> new-pos pos) |
|---|
| 377 |
;; if NEW-POS is further to the right we |
|---|
| 378 |
;; advance POS and try again, i.e. we go |
|---|
| 379 |
;; back to the start of the outer LOOP |
|---|
| 380 |
(setq pos new-pos) |
|---|
| 381 |
;; this means "return from inner LOOP" |
|---|
| 382 |
(return)) |
|---|
| 383 |
(t |
|---|
| 384 |
;; otherwise NEW-POS is smaller than POS, |
|---|
| 385 |
;; so we have to redo the inner LOOP to |
|---|
| 386 |
;; find another end string candidate |
|---|
| 387 |
;; further to the right |
|---|
| 388 |
(setq try-pos (1+ *end-string-pos*)))))))))))) |
|---|
| 389 |
((and starts-with-everything end-string-test end-string-offset) |
|---|
| 390 |
;; we know that the regular expression starts with ".*" (which |
|---|
| 391 |
;; is not in single-line-mode, see CREATE-SCANNER-AUX) and ends |
|---|
| 392 |
;; with a constant end string and we know the end string's |
|---|
| 393 |
;; offset (from the left) |
|---|
| 394 |
(insert-advance-fn |
|---|
| 395 |
(advance-fn (pos) |
|---|
| 396 |
(declare (fixnum end-string-offset) |
|---|
| 397 |
(function end-string-test)) |
|---|
| 398 |
(loop |
|---|
| 399 |
(unless (setq pos (newline-skipper pos)) |
|---|
| 400 |
;; if we can't find a #\Newline we give up immediately |
|---|
| 401 |
(return-from scan nil)) |
|---|
| 402 |
(locally |
|---|
| 403 |
;; from here we know that POS is a FIXNUM |
|---|
| 404 |
(declare (fixnum pos)) |
|---|
| 405 |
(when (= pos (- (the fixnum *end-string-pos*) end-string-offset)) |
|---|
| 406 |
;; if we already found an end string candidate the |
|---|
| 407 |
;; position of which matches the place behind the |
|---|
| 408 |
;; #\Newline we're done |
|---|
| 409 |
(return-from advance-fn pos)) |
|---|
| 410 |
(let ((try-pos pos)) |
|---|
| 411 |
;; otherwise try (again) to find an end string |
|---|
| 412 |
;; candidate which starts behind the #\Newline |
|---|
| 413 |
(loop |
|---|
| 414 |
(unless (setq *end-string-pos* |
|---|
| 415 |
(funcall end-string-test try-pos)) |
|---|
| 416 |
;; no end string candidate found, so we give up |
|---|
| 417 |
(return-from scan nil)) |
|---|
| 418 |
;; NEW-POS is where we should start scanning |
|---|
| 419 |
;; according to the end string candidate |
|---|
| 420 |
(let ((new-pos (- (the fixnum *end-string-pos*) |
|---|
| 421 |
end-string-offset))) |
|---|
| 422 |
(declare (fixnum new-pos *end-string-pos*)) |
|---|
| 423 |
(cond ((= new-pos pos) |
|---|
| 424 |
;; if POS and NEW-POS are equal then the |
|---|
| 425 |
;; the end string candidate agrees with |
|---|
| 426 |
;; the #\Newline so we're fine |
|---|
| 427 |
(return-from advance-fn pos)) |
|---|
| 428 |
((> new-pos pos) |
|---|
| 429 |
;; if NEW-POS is further to the right we |
|---|
| 430 |
;; advance POS and try again, i.e. we go |
|---|
| 431 |
;; back to the start of the outer LOOP |
|---|
| 432 |
(setq pos new-pos) |
|---|
| 433 |
;; this means "return from inner LOOP" |
|---|
| 434 |
(return)) |
|---|
| 435 |
(t |
|---|
| 436 |
;; otherwise NEW-POS is smaller than POS, |
|---|
| 437 |
;; so we have to redo the inner LOOP to |
|---|
| 438 |
;; find another end string candidate |
|---|
| 439 |
;; further to the right |
|---|
| 440 |
(setq try-pos (1+ *end-string-pos*)))))))))))) |
|---|
| 441 |
((and start-string-test end-string-test) |
|---|
| 442 |
;; we know that the regular expression has constant start and |
|---|
| 443 |
;; end strings; similar to the first case but we only need to |
|---|
| 444 |
;; check for the end string, it doesn't provide enough |
|---|
| 445 |
;; information to advance POS |
|---|
| 446 |
(insert-advance-fn |
|---|
| 447 |
(advance-fn (pos) |
|---|
| 448 |
(declare (function start-string-test end-string-test)) |
|---|
| 449 |
(unless (setq pos (funcall start-string-test pos)) |
|---|
| 450 |
(return-from scan nil)) |
|---|
| 451 |
(if (<= (the fixnum pos) |
|---|
| 452 |
(the fixnum *end-string-pos*)) |
|---|
| 453 |
(return-from advance-fn pos)) |
|---|
| 454 |
(unless (setq *end-string-pos* (funcall end-string-test pos)) |
|---|
| 455 |
(return-from scan nil)) |
|---|
| 456 |
pos))) |
|---|
| 457 |
((and starts-with-everything end-string-test) |
|---|
| 458 |
;; we know that the regular expression starts with ".*" (which |
|---|
| 459 |
;; is not in single-line-mode, see CREATE-SCANNER-AUX) and ends |
|---|
| 460 |
;; with a constant end string; similar to the second case but we |
|---|
| 461 |
;; only need to check for the end string, it doesn't provide |
|---|
| 462 |
;; enough information to advance POS |
|---|
| 463 |
(insert-advance-fn |
|---|
| 464 |
(advance-fn (pos) |
|---|
| 465 |
(declare (function end-string-test)) |
|---|
| 466 |
(unless (setq pos (newline-skipper pos)) |
|---|
| 467 |
(return-from scan nil)) |
|---|
| 468 |
(if (<= (the fixnum pos) |
|---|
| 469 |
(the fixnum *end-string-pos*)) |
|---|
| 470 |
(return-from advance-fn pos)) |
|---|
| 471 |
(unless (setq *end-string-pos* (funcall end-string-test pos)) |
|---|
| 472 |
(return-from scan nil)) |
|---|
| 473 |
pos))) |
|---|
| 474 |
(start-string-test |
|---|
| 475 |
;; just check for constant start string candidate |
|---|
| 476 |
(insert-advance-fn |
|---|
| 477 |
(advance-fn (pos) |
|---|
| 478 |
(declare (function start-string-test)) |
|---|
| 479 |
(unless (setq pos (funcall start-string-test pos)) |
|---|
| 480 |
(return-from scan nil)) |
|---|
| 481 |
pos))) |
|---|
| 482 |
(starts-with-everything |
|---|
| 483 |
;; just advance POS with NEWLINE-SKIPPER |
|---|
| 484 |
(insert-advance-fn |
|---|
| 485 |
(advance-fn (pos) |
|---|
| 486 |
(unless (setq pos (newline-skipper pos)) |
|---|
| 487 |
(return-from scan nil)) |
|---|
| 488 |
pos))) |
|---|
| 489 |
(end-string-test |
|---|
| 490 |
;; just check for the next end string candidate if POS has |
|---|
| 491 |
;; advanced beyond the last one |
|---|
| 492 |
(insert-advance-fn |
|---|
| 493 |
(advance-fn (pos) |
|---|
| 494 |
(declare (function end-string-test)) |
|---|
| 495 |
(if (<= (the fixnum pos) |
|---|
| 496 |
(the fixnum *end-string-pos*)) |
|---|
| 497 |
(return-from advance-fn pos)) |
|---|
| 498 |
(unless (setq *end-string-pos* (funcall end-string-test pos)) |
|---|
| 499 |
(return-from scan nil)) |
|---|
| 500 |
pos))) |
|---|
| 501 |
(t |
|---|
| 502 |
;; not enough optimization information about the regular |
|---|
| 503 |
;; expression to optimize so we just return POS |
|---|
| 504 |
(insert-advance-fn |
|---|
| 505 |
(advance-fn (pos) |
|---|
| 506 |
pos)))))) |
|---|