| 1 |
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-INTERPOL; Base: 10 -*- |
|---|
| 2 |
;;; $Header: /usr/local/cvsrep/cl-interpol/read.lisp,v 1.31 2008/07/23 15:13:08 edi Exp $ |
|---|
| 3 |
|
|---|
| 4 |
;;; Copyright (c) 2003-2008, Dr. Edmund Weitz. All rights reserved. |
|---|
| 5 |
|
|---|
| 6 |
;;; Redistribution and use in source and binary forms, with or without |
|---|
| 7 |
;;; modification, are permitted provided that the following conditions |
|---|
| 8 |
;;; are met: |
|---|
| 9 |
|
|---|
| 10 |
;;; * Redistributions of source code must retain the above copyright |
|---|
| 11 |
;;; notice, this list of conditions and the following disclaimer. |
|---|
| 12 |
|
|---|
| 13 |
;;; * Redistributions in binary form must reproduce the above |
|---|
| 14 |
;;; copyright notice, this list of conditions and the following |
|---|
| 15 |
;;; disclaimer in the documentation and/or other materials |
|---|
| 16 |
;;; provided with the distribution. |
|---|
| 17 |
|
|---|
| 18 |
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED |
|---|
| 19 |
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|---|
| 20 |
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|---|
| 21 |
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
|---|
| 22 |
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|---|
| 23 |
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE |
|---|
| 24 |
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|---|
| 25 |
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|---|
| 26 |
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|---|
| 27 |
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|---|
| 28 |
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 29 |
|
|---|
| 30 |
(in-package :cl-interpol) |
|---|
| 31 |
|
|---|
| 32 |
(defun read-while (predicate &key max) |
|---|
| 33 |
"Reads characters from *STREAM* while PREDICATE returns a true value |
|---|
| 34 |
for each character. Returns at most MAX characters if MAX is true." |
|---|
| 35 |
(when (eql max 0) |
|---|
| 36 |
(return-from read-while "")) |
|---|
| 37 |
(let ((collector (make-collector))) |
|---|
| 38 |
(loop for count of-type fixnum from 1 |
|---|
| 39 |
for c = (peek-char*) |
|---|
| 40 |
while (and (or (not max) |
|---|
| 41 |
(<= count max)) |
|---|
| 42 |
c |
|---|
| 43 |
(funcall predicate c)) |
|---|
| 44 |
do (vector-push-extend (read-char*) collector) |
|---|
| 45 |
finally (return collector)))) |
|---|
| 46 |
|
|---|
| 47 |
(declaim (inline get-number)) |
|---|
| 48 |
(defun get-number (&key (radix 10) max) |
|---|
| 49 |
"Reads and consumes the number *STREAM* is currently looking at and |
|---|
| 50 |
returns it. Returns NIL if no number could be identified. RADIX is |
|---|
| 51 |
used as in PARSE-INTEGER. If MAX is not NIL we'll read at most the |
|---|
| 52 |
next MAX characters." |
|---|
| 53 |
(parse-integer (read-while (lambda (c) |
|---|
| 54 |
(digit-char-p c radix)) |
|---|
| 55 |
:max max) |
|---|
| 56 |
:radix radix |
|---|
| 57 |
:junk-allowed t)) |
|---|
| 58 |
|
|---|
| 59 |
(defun resolve-unicode-name (name) |
|---|
| 60 |
"Tries to return a character which was encoded as \\N<NAME>." |
|---|
| 61 |
(or (character-named name) |
|---|
| 62 |
(gethash (canonicalize-name name) *unicode-aliases*))) |
|---|
| 63 |
|
|---|
| 64 |
(defun get-char-from-unicode-name () |
|---|
| 65 |
"Parses and returns a named character after \"\\N\" has already been |
|---|
| 66 |
read. This function reads from *STREAM*." |
|---|
| 67 |
(let ((next-char (read-char*))) |
|---|
| 68 |
(unless (char= next-char #\{) |
|---|
| 69 |
(signal-reader-error "Expected { after \\N")) |
|---|
| 70 |
(let ((name (read-while (lambda (c) |
|---|
| 71 |
(and (char/= c #\}) |
|---|
| 72 |
(char/= c *term-char*)))))) |
|---|
| 73 |
(let ((next-char (read-char*))) |
|---|
| 74 |
(unless (char= next-char #\}) |
|---|
| 75 |
(signal-reader-error "Expected } after Unicode character name"))) |
|---|
| 76 |
(or (resolve-unicode-name name) |
|---|
| 77 |
(signal-reader-error "Could not find character with name '~A'" |
|---|
| 78 |
name))))) |
|---|
| 79 |
|
|---|
| 80 |
(defun unescape-char (regex-mode) |
|---|
| 81 |
"Convert the characters(s) on *STREAM* following a backslash into a |
|---|
| 82 |
character which is returned. This function is to be called when the |
|---|
| 83 |
backslash has already been consumed." |
|---|
| 84 |
(let ((chr (read-char*))) |
|---|
| 85 |
;; certain escape sequences are left as is when in regex mode |
|---|
| 86 |
(when (or (and (eq regex-mode :in-char-class) |
|---|
| 87 |
(find chr "pPwWsSdD" :test #'char=)) |
|---|
| 88 |
(and (eq regex-mode t) |
|---|
| 89 |
(find chr "kpPwWsSdDbBAZz" :test #'char=))) |
|---|
| 90 |
(return-from unescape-char |
|---|
| 91 |
(concatenate 'string "\\" (string chr)))) |
|---|
| 92 |
(let ((result |
|---|
| 93 |
(case chr |
|---|
| 94 |
((#\N) |
|---|
| 95 |
;; named Unicode chars |
|---|
| 96 |
(get-char-from-unicode-name)) |
|---|
| 97 |
((#\c) |
|---|
| 98 |
;; \cx means control-x |
|---|
| 99 |
(when (char= (peek-char*) *term-char*) |
|---|
| 100 |
(signal-reader-error "String ended after \\c")) |
|---|
| 101 |
(code-char (logxor #x40 |
|---|
| 102 |
(char-code (char-upcase (read-char*)))))) |
|---|
| 103 |
((#\x) |
|---|
| 104 |
(cond ((char= (peek-char*) #\{) |
|---|
| 105 |
;; "wide" hex char, i.e. hexadecimal number is |
|---|
| 106 |
;; enclosed in curly brackets |
|---|
| 107 |
(read-char*) |
|---|
| 108 |
(prog1 |
|---|
| 109 |
(let ((code (or (get-number :radix 16) |
|---|
| 110 |
;; allow for empty string |
|---|
| 111 |
0))) |
|---|
| 112 |
(or (and (< code char-code-limit) |
|---|
| 113 |
(code-char code)) |
|---|
| 114 |
(signal-reader-error |
|---|
| 115 |
"No character for char-code #x~X" code))) |
|---|
| 116 |
(unless (char= (peek-char*) #\}) |
|---|
| 117 |
(signal-reader-error "Expected } after hex code")) |
|---|
| 118 |
(read-char*))) |
|---|
| 119 |
(t |
|---|
| 120 |
;; \x should be followed by a hexadecimal char |
|---|
| 121 |
;; code, two digits or less; note that it is |
|---|
| 122 |
;; OK if \x is followed by zero digits |
|---|
| 123 |
(make-char-from-code (get-number :radix 16 :max 2))))) |
|---|
| 124 |
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) |
|---|
| 125 |
(cond ((and (eq regex-mode t) |
|---|
| 126 |
(char/= chr #\0)) |
|---|
| 127 |
;; leave as is if we're in regex mode (and not |
|---|
| 128 |
;; within in a character class) |
|---|
| 129 |
(concatenate 'string "\\" (string chr))) |
|---|
| 130 |
((or (char= chr #\8) |
|---|
| 131 |
(char= chr #\9)) |
|---|
| 132 |
;; outside of regex mode "\8" is "8" (in regex |
|---|
| 133 |
;; mode it is read like "\08"...) |
|---|
| 134 |
chr) |
|---|
| 135 |
(t |
|---|
| 136 |
(unread-char chr *stream*) |
|---|
| 137 |
;; now \x should be followed by an octal char |
|---|
| 138 |
;; code, three digits or less |
|---|
| 139 |
(make-char-from-code (get-number :radix 8 :max 3))))) |
|---|
| 140 |
;; the following five character names are |
|---|
| 141 |
;; 'semi-standard' according to the CLHS but I'm not |
|---|
| 142 |
;; aware of any implementation that doesn't implement |
|---|
| 143 |
;; them |
|---|
| 144 |
((#\t) |
|---|
| 145 |
#\Tab) |
|---|
| 146 |
((#\n) |
|---|
| 147 |
#\Newline) |
|---|
| 148 |
((#\r) |
|---|
| 149 |
#\Return) |
|---|
| 150 |
((#\f) |
|---|
| 151 |
#\Page) |
|---|
| 152 |
((#\b) |
|---|
| 153 |
#\Backspace) |
|---|
| 154 |
((#\a) |
|---|
| 155 |
(code-char 7)) ; ASCII bell |
|---|
| 156 |
((#\e) |
|---|
| 157 |
(code-char 27)) ; ASCII escape |
|---|
| 158 |
(otherwise |
|---|
| 159 |
;; all other characters aren't affected by a backslash |
|---|
| 160 |
chr)))) |
|---|
| 161 |
(cond ((and (characterp result) |
|---|
| 162 |
;; some characters must be 'protected' from CL-PPCRE |
|---|
| 163 |
(or (and (eq regex-mode :in-char-class) |
|---|
| 164 |
(find result "\\^[]-" :test #'char=)) |
|---|
| 165 |
(and (eq regex-mode t) |
|---|
| 166 |
(find result "\\^[]-.$|()*+?" :test #'char=)))) |
|---|
| 167 |
(concatenate 'string "\\" (string result))) |
|---|
| 168 |
(t result))))) |
|---|
| 169 |
|
|---|
| 170 |
(declaim (inline normal-name-char-p) |
|---|
| 171 |
(inline never-name-char-p)) |
|---|
| 172 |
|
|---|
| 173 |
(defun normal-name-char-p (c) |
|---|
| 174 |
(and c (or (alphanumericp c) |
|---|
| 175 |
(member c '(#\_ #\- #\+ #\*))))) |
|---|
| 176 |
|
|---|
| 177 |
(defun never-name-char-p (c) |
|---|
| 178 |
(or (not c) |
|---|
| 179 |
(get-macro-character c) |
|---|
| 180 |
(member c '(#\$ #\@)))) |
|---|
| 181 |
|
|---|
| 182 |
(defvar quell-warnings-form |
|---|
| 183 |
#+sbcl '(declare (optimize (sb-ext:inhibit-warnings 3))) |
|---|
| 184 |
#-sbcl nil |
|---|
| 185 |
"A declaration form to quiet warnings about unbound variables |
|---|
| 186 |
within a lexical environment.") |
|---|
| 187 |
|
|---|
| 188 |
(defun read-longest-name () |
|---|
| 189 |
(coerce |
|---|
| 190 |
(loop until (never-name-char-p (peek-char nil *stream* nil nil t)) |
|---|
| 191 |
collect (read-char*)) |
|---|
| 192 |
'string)) |
|---|
| 193 |
|
|---|
| 194 |
(defun read-optional-delimited () |
|---|
| 195 |
"Read the stuff following an optional delimiter, returning a form |
|---|
| 196 |
that tries to deal correctly with lexical variables." |
|---|
| 197 |
(flet ((try-pos (name i form) |
|---|
| 198 |
(let ((ostr (gensym))) |
|---|
| 199 |
`(handler-case |
|---|
| 200 |
(with-output-to-string (,ostr) |
|---|
| 201 |
(princ ,(read-from-string (subseq name 0 i)) ,ostr) |
|---|
| 202 |
(princ ,(subseq name i) ,ostr) |
|---|
| 203 |
,ostr) |
|---|
| 204 |
(unbound-variable () ,form))))) |
|---|
| 205 |
|
|---|
| 206 |
(loop |
|---|
| 207 |
with name = (read-longest-name) |
|---|
| 208 |
with form = `(error ,(format nil "Interpolation error in ~s~%" name)) |
|---|
| 209 |
with ostr = (gensym) |
|---|
| 210 |
for i = (position-if-not #'normal-name-char-p name) |
|---|
| 211 |
then (position-if-not #'normal-name-char-p name :start (1+ i)) |
|---|
| 212 |
|
|---|
| 213 |
unless i |
|---|
| 214 |
return `(let () ,quell-warnings-form |
|---|
| 215 |
(handler-case |
|---|
| 216 |
(with-output-to-string (,ostr) |
|---|
| 217 |
(princ ,(read-from-string name) ,ostr) |
|---|
| 218 |
,ostr) |
|---|
| 219 |
(unbound-variable () ,form))) |
|---|
| 220 |
|
|---|
| 221 |
if (> i 0) |
|---|
| 222 |
do (setq form (try-pos name i form)) |
|---|
| 223 |
|
|---|
| 224 |
if (< i (length name)) |
|---|
| 225 |
do (setq form (try-pos name (1+ i) form))))) |
|---|
| 226 |
|
|---|
| 227 |
(declaim (inline read-form)) |
|---|
| 228 |
(defun read-form () |
|---|
| 229 |
"Reads and returns one or more Lisp forms from *STREAM* if the |
|---|
| 230 |
character we're looking at is a valid inner delimiter. Otherwise |
|---|
| 231 |
returns NIL." |
|---|
| 232 |
(let* ((start-delimiter (peek-char*)) |
|---|
| 233 |
(end-delimiter (get-end-delimiter start-delimiter *inner-delimiters*))) |
|---|
| 234 |
(cond ((null end-delimiter) |
|---|
| 235 |
(if *optional-delimiters-p* |
|---|
| 236 |
(read-optional-delimited) |
|---|
| 237 |
nil)) |
|---|
| 238 |
(t |
|---|
| 239 |
`(progn |
|---|
| 240 |
,@(progn |
|---|
| 241 |
(read-char*) |
|---|
| 242 |
(let ((*readtable* (copy-readtable*))) |
|---|
| 243 |
;; temporarily change the readtable |
|---|
| 244 |
(set-syntax-from-char end-delimiter #\)) |
|---|
| 245 |
(read-delimited-list end-delimiter *stream* t)))))))) |
|---|
| 246 |
|
|---|
| 247 |
(defun interpol-reader (*stream* char arg) |
|---|
| 248 |
"The actual reader function for the 'sub-character' #\?." |
|---|
| 249 |
(declare (ignore arg char)) |
|---|
| 250 |
(let ((*start-char* (read-char*)) |
|---|
| 251 |
;; REGEX-MODE is true if we're in regular expression mode; it |
|---|
| 252 |
;; can have one of the values :START-OF-CHAR-CLASS, |
|---|
| 253 |
;; :START-OF-NEGATED-CHAR-CLASS, or :IN-CHAR-CLASS if we're |
|---|
| 254 |
;; inside of a character class or just about to start one - |
|---|
| 255 |
;; otherwise the value is T |
|---|
| 256 |
regex-mode |
|---|
| 257 |
;; EXTENDED-MODE is true if we're in extended regular |
|---|
| 258 |
;; expression mode |
|---|
| 259 |
extended-mode) |
|---|
| 260 |
(when (char-equal *start-char* #\r) |
|---|
| 261 |
(setq regex-mode t |
|---|
| 262 |
*start-char* (read-char*))) |
|---|
| 263 |
(when (char-equal *start-char* #\x) |
|---|
| 264 |
(setq extended-mode t |
|---|
| 265 |
*start-char* (read-char*))) |
|---|
| 266 |
(when (and (not regex-mode) |
|---|
| 267 |
(find *start-char* *regex-delimiters* :test #'char=)) |
|---|
| 268 |
(setq regex-mode t)) |
|---|
| 269 |
(unless regex-mode |
|---|
| 270 |
(setq extended-mode nil)) |
|---|
| 271 |
(let ((*term-char* (get-end-delimiter *start-char* |
|---|
| 272 |
*outer-delimiters* |
|---|
| 273 |
:errorp t)) |
|---|
| 274 |
(*pair-level* 0) |
|---|
| 275 |
(*inner-delimiters* (if regex-mode |
|---|
| 276 |
(intersection *inner-delimiters* |
|---|
| 277 |
'((#\{ . #\})) |
|---|
| 278 |
:test #'equal) |
|---|
| 279 |
*inner-delimiters*)) |
|---|
| 280 |
*saw-backslash* |
|---|
| 281 |
*readtable-copy*) |
|---|
| 282 |
(prog1 |
|---|
| 283 |
(inner-reader regex-mode extended-mode nil nil) |
|---|
| 284 |
;; consume the closing outer delimiter |
|---|
| 285 |
(read-char*))))) |
|---|
| 286 |
|
|---|
| 287 |
(defun inner-reader (regex-mode extended-mode quote-mode case-mode) |
|---|
| 288 |
"Helper function for INTERPOL-READER which does all the work. May |
|---|
| 289 |
call itself recursively." |
|---|
| 290 |
;; REGEX-MODE and EXTENDED-MODE as described above; QUOTE-MODE is |
|---|
| 291 |
;; true if we're inside a \Q scope; CASE-MODE is true if we're |
|---|
| 292 |
;; inside a \L or \U scope |
|---|
| 293 |
(let* ((string-stream (gensym)) ;; the string stream |
|---|
| 294 |
;; we use for WITH-OUTPUT-TO-STRING |
|---|
| 295 |
;; if this is not a constant string |
|---|
| 296 |
(collector (make-collector)) ;; we collect |
|---|
| 297 |
;; characters into this |
|---|
| 298 |
;; extentable string |
|---|
| 299 |
result ;; a list of all characters, strings, and forms |
|---|
| 300 |
;; so far (in reverse order while withing the loop) |
|---|
| 301 |
handle-next-char) |
|---|
| 302 |
(block main-loop ;; we need this name so we can leave the LOOP below |
|---|
| 303 |
(flet ((compute-result () |
|---|
| 304 |
;; local function used to leave the loop and compute |
|---|
| 305 |
;; the final RESULT |
|---|
| 306 |
(setq result |
|---|
| 307 |
(nreverse |
|---|
| 308 |
(if (plusp (length collector)) |
|---|
| 309 |
;; add COLLECTOR if it's not empty |
|---|
| 310 |
(cons collector result) |
|---|
| 311 |
result))) |
|---|
| 312 |
(return-from main-loop)) |
|---|
| 313 |
(parse-with-case-mode (action-name) |
|---|
| 314 |
;; local function used to read while in a \U or \L scope |
|---|
| 315 |
(let ((string-to-modify |
|---|
| 316 |
;; read until \E, \L, \U, or end of string |
|---|
| 317 |
(inner-reader regex-mode extended-mode regex-mode t))) |
|---|
| 318 |
(if (stringp string-to-modify) |
|---|
| 319 |
;; modify directly if constant string |
|---|
| 320 |
(funcall action-name string-to-modify) |
|---|
| 321 |
;; otherwise create a form to do that at run time |
|---|
| 322 |
`(write-string |
|---|
| 323 |
(,action-name ,string-to-modify) |
|---|
| 324 |
,string-stream))))) |
|---|
| 325 |
(loop |
|---|
| 326 |
(let ((next-char (read-char*))) |
|---|
| 327 |
(when regex-mode |
|---|
| 328 |
;; when in regex mode make sure where we are with |
|---|
| 329 |
;; respect to character classes |
|---|
| 330 |
(setq regex-mode |
|---|
| 331 |
(case next-char |
|---|
| 332 |
((#\[) |
|---|
| 333 |
(ecase regex-mode |
|---|
| 334 |
((:start-of-char-class |
|---|
| 335 |
:start-of-negated-char-class |
|---|
| 336 |
:in-char-class) :in-char-class) |
|---|
| 337 |
((t) :start-of-char-class))) |
|---|
| 338 |
((#\^) |
|---|
| 339 |
(ecase regex-mode |
|---|
| 340 |
((:start-of-char-class) :start-of-negated-char-class) |
|---|
| 341 |
((:start-of-negated-char-class |
|---|
| 342 |
:in-char-class) :in-char-class) |
|---|
| 343 |
((t) t))) |
|---|
| 344 |
((#\]) |
|---|
| 345 |
(ecase regex-mode |
|---|
| 346 |
((:start-of-char-class |
|---|
| 347 |
:start-of-negated-char-class) :in-char-class) |
|---|
| 348 |
((:in-char-class t) t))) |
|---|
| 349 |
(otherwise |
|---|
| 350 |
(ecase regex-mode |
|---|
| 351 |
((:start-of-char-class |
|---|
| 352 |
:start-of-negated-char-class |
|---|
| 353 |
:in-char-class) :in-char-class) |
|---|
| 354 |
((t) t)))))) |
|---|
| 355 |
(when (and (char= next-char *start-char*) |
|---|
| 356 |
(char/= *start-char* *term-char*)) |
|---|
| 357 |
;; if we see, say, #\( and our closing delimiter is #\) |
|---|
| 358 |
;; we increment *PAIR-LEVEL* so the parentheses can next |
|---|
| 359 |
;; without ending the string |
|---|
| 360 |
(incf *pair-level*)) |
|---|
| 361 |
(let ((interpolation |
|---|
| 362 |
(cond ((and (char= next-char *term-char*) |
|---|
| 363 |
(plusp *pair-level*)) |
|---|
| 364 |
;; although this is the outer closing |
|---|
| 365 |
;; delimiter we don't stop parsing because |
|---|
| 366 |
;; we're insided a nested pair of |
|---|
| 367 |
;; bracketing characters |
|---|
| 368 |
(decf *pair-level*) |
|---|
| 369 |
*term-char*) |
|---|
| 370 |
((char= next-char *term-char*) |
|---|
| 371 |
;; now we really stop - but we don't |
|---|
| 372 |
;; consume the closing delimiter because |
|---|
| 373 |
;; we may need it again to end another |
|---|
| 374 |
;; scope |
|---|
| 375 |
(unread-char next-char *stream*) |
|---|
| 376 |
(compute-result)) |
|---|
| 377 |
(t |
|---|
| 378 |
(case next-char |
|---|
| 379 |
((#\L) |
|---|
| 380 |
(cond ((not *saw-backslash*) |
|---|
| 381 |
;; a normal #\L, no 'pending' |
|---|
| 382 |
;; backslash |
|---|
| 383 |
#\L) |
|---|
| 384 |
(case-mode |
|---|
| 385 |
;; a backslashed #\L which |
|---|
| 386 |
;; we've seen before but we |
|---|
| 387 |
;; still have to close at |
|---|
| 388 |
;; least one \Q/\L/\E scope |
|---|
| 389 |
(unread-char #\L *stream*) |
|---|
| 390 |
(compute-result)) |
|---|
| 391 |
(t |
|---|
| 392 |
;; all scopes are closed, now |
|---|
| 393 |
;; read and downcase 'till \E |
|---|
| 394 |
;; or somesuch |
|---|
| 395 |
(setq *saw-backslash* nil) |
|---|
| 396 |
(parse-with-case-mode 'string-downcase)))) |
|---|
| 397 |
((#\U) |
|---|
| 398 |
;; see comments for #\L above |
|---|
| 399 |
(cond ((not *saw-backslash*) |
|---|
| 400 |
#\U) |
|---|
| 401 |
(case-mode |
|---|
| 402 |
(unread-char #\U *stream*) |
|---|
| 403 |
(compute-result)) |
|---|
| 404 |
(t |
|---|
| 405 |
(setq *saw-backslash* nil) |
|---|
| 406 |
(parse-with-case-mode 'string-upcase)))) |
|---|
| 407 |
((#\Space #\Tab #\Linefeed #\Return #\Page) |
|---|
| 408 |
(cond ((and extended-mode |
|---|
| 409 |
(not (eq regex-mode :in-char-class))) |
|---|
| 410 |
;; in extended mode (if not in |
|---|
| 411 |
;; a character class) |
|---|
| 412 |
;; whitespace is removed |
|---|
| 413 |
"") |
|---|
| 414 |
(t next-char))) |
|---|
| 415 |
((#\() |
|---|
| 416 |
(cond ((and (eq regex-mode t) |
|---|
| 417 |
(null quote-mode) |
|---|
| 418 |
(char/= *term-char* #\?) |
|---|
| 419 |
(eql (peek-char*) #\?)) |
|---|
| 420 |
;; this could start an |
|---|
| 421 |
;; embedded comment in regex |
|---|
| 422 |
;; mode (and we're /not/ |
|---|
| 423 |
;; inside of a \Q scope or a |
|---|
| 424 |
;; character class) |
|---|
| 425 |
(read-char*) |
|---|
| 426 |
(cond ((and (char/= *term-char* #\#) |
|---|
| 427 |
(eql (peek-char*) #\#)) |
|---|
| 428 |
;; yes, it's a |
|---|
| 429 |
;; comment, so consume |
|---|
| 430 |
;; characters 'till #\) |
|---|
| 431 |
(read-while |
|---|
| 432 |
(lambda (char) |
|---|
| 433 |
(and (char/= char #\)) |
|---|
| 434 |
(char/= char *term-char*)))) |
|---|
| 435 |
(cond ((char= (read-char*) *term-char*) |
|---|
| 436 |
(signal-reader-error |
|---|
| 437 |
"Incomplete regex comment starting with '(#'")) |
|---|
| 438 |
((not (digit-char-p (peek-char*) 16)) |
|---|
| 439 |
"") |
|---|
| 440 |
;; special case |
|---|
| 441 |
;; if next |
|---|
| 442 |
;; character |
|---|
| 443 |
;; could |
|---|
| 444 |
;; potentially |
|---|
| 445 |
;; continue an |
|---|
| 446 |
;; octal or |
|---|
| 447 |
;; hexadecimal |
|---|
| 448 |
;; representation |
|---|
| 449 |
(t "(?:)"))) |
|---|
| 450 |
;; no, wasn't a comment |
|---|
| 451 |
(t "(?"))) |
|---|
| 452 |
(t #\())) |
|---|
| 453 |
((#\#) |
|---|
| 454 |
(cond ((and (eq regex-mode t) |
|---|
| 455 |
extended-mode |
|---|
| 456 |
(null quote-mode)) |
|---|
| 457 |
;; we're in extended regex |
|---|
| 458 |
;; mode and not inside of a \Q |
|---|
| 459 |
;; scope or a character class, |
|---|
| 460 |
;; so this is a comment and we |
|---|
| 461 |
;; consume it 'till #\Newline |
|---|
| 462 |
;; or *TERM-CHAR* |
|---|
| 463 |
(read-while |
|---|
| 464 |
(lambda (char) |
|---|
| 465 |
(and (char/= char #\Newline) |
|---|
| 466 |
(char/= char *term-char*)))) |
|---|
| 467 |
(when (char= (peek-char*) #\Newline) |
|---|
| 468 |
(read-char*)) |
|---|
| 469 |
(cond ((not (digit-char-p (peek-char*) |
|---|
| 470 |
16)) |
|---|
| 471 |
"") |
|---|
| 472 |
;; special case, see above |
|---|
| 473 |
(t "(?:)"))) |
|---|
| 474 |
(t #\#))) |
|---|
| 475 |
((#\\) |
|---|
| 476 |
(case (peek-char*) |
|---|
| 477 |
((#\Q) |
|---|
| 478 |
;; \Q - start a new quote scope |
|---|
| 479 |
(read-char*) |
|---|
| 480 |
(let ((string-to-quote |
|---|
| 481 |
(inner-reader regex-mode |
|---|
| 482 |
extended-mode |
|---|
| 483 |
t case-mode))) |
|---|
| 484 |
(if (stringp string-to-quote) |
|---|
| 485 |
;; if we got a constant string |
|---|
| 486 |
;; we modify it directly |
|---|
| 487 |
(quote-meta-chars string-to-quote) |
|---|
| 488 |
;; otherwise we expand into code |
|---|
| 489 |
`(write-string |
|---|
| 490 |
(quote-meta-chars ,string-to-quote) |
|---|
| 491 |
,string-stream)))) |
|---|
| 492 |
((#\L) |
|---|
| 493 |
;; \L - start a new case-modifying |
|---|
| 494 |
;; scope |
|---|
| 495 |
(cond (case-mode |
|---|
| 496 |
;; if we're already in |
|---|
| 497 |
;; this mode we have to |
|---|
| 498 |
;; end all previous scopes |
|---|
| 499 |
;; first - we set |
|---|
| 500 |
;; *SAW-BACKSLASH* to T so |
|---|
| 501 |
;; the #\L is read until |
|---|
| 502 |
;; all scopes are finished |
|---|
| 503 |
(setq *saw-backslash* t) |
|---|
| 504 |
(compute-result)) |
|---|
| 505 |
(t |
|---|
| 506 |
;; all scopes are closed, now |
|---|
| 507 |
;; read and downcase 'till \E |
|---|
| 508 |
;; or somesuch |
|---|
| 509 |
(setq *saw-backslash* nil) |
|---|
| 510 |
(read-char*) |
|---|
| 511 |
(parse-with-case-mode 'string-downcase)))) |
|---|
| 512 |
((#\U) |
|---|
| 513 |
;; see comments for #\L above |
|---|
| 514 |
(cond (case-mode |
|---|
| 515 |
(setq *saw-backslash* t) |
|---|
| 516 |
(compute-result)) |
|---|
| 517 |
(t |
|---|
| 518 |
(setq *saw-backslash* nil) |
|---|
| 519 |
(read-char*) |
|---|
| 520 |
(parse-with-case-mode 'string-upcase)))) |
|---|
| 521 |
((#\E) |
|---|
| 522 |
;; \E - ends exactly one scope |
|---|
| 523 |
(read-char*) |
|---|
| 524 |
(if (or quote-mode case-mode) |
|---|
| 525 |
(compute-result) |
|---|
| 526 |
"")) |
|---|
| 527 |
((#\l) |
|---|
| 528 |
;; \l - downcase next character |
|---|
| 529 |
(read-char*) |
|---|
| 530 |
;; remember that we have to do this |
|---|
| 531 |
(setq handle-next-char :downcase) |
|---|
| 532 |
nil) |
|---|
| 533 |
((#\u) |
|---|
| 534 |
;; \u - upcase next character |
|---|
| 535 |
(read-char*) |
|---|
| 536 |
;; remember that we have to do this |
|---|
| 537 |
(setq handle-next-char :upcase) |
|---|
| 538 |
nil) |
|---|
| 539 |
(otherwise |
|---|
| 540 |
;; otherwise this is a |
|---|
| 541 |
;; backslash-escaped character |
|---|
| 542 |
(unescape-char regex-mode)))) |
|---|
| 543 |
((#\$) |
|---|
| 544 |
;; #\$ - might be an interpolation |
|---|
| 545 |
(let ((form (read-form))) |
|---|
| 546 |
(cond ((null form) |
|---|
| 547 |
;; no, just dollar sign |
|---|
| 548 |
#\$) |
|---|
| 549 |
(handle-next-char |
|---|
| 550 |
;; yes, and we have to |
|---|
| 551 |
;; modify the first |
|---|
| 552 |
;; character |
|---|
| 553 |
(prog1 |
|---|
| 554 |
(let ((string (gensym))) |
|---|
| 555 |
`(let ((,string (format nil "~A" |
|---|
| 556 |
,form))) |
|---|
| 557 |
(when (plusp (length ,string)) |
|---|
| 558 |
(setf (char ,string 0) |
|---|
| 559 |
(,(if (eq handle-next-char |
|---|
| 560 |
:downcase) |
|---|
| 561 |
'char-downcase |
|---|
| 562 |
'char-upcase) |
|---|
| 563 |
(char ,string 0)))) |
|---|
| 564 |
(write-string ,string ,string-stream))) |
|---|
| 565 |
(setq handle-next-char nil))) |
|---|
| 566 |
(t |
|---|
| 567 |
;; no modification, just |
|---|
| 568 |
;; insert a form to PRINC |
|---|
| 569 |
;; this interpolation |
|---|
| 570 |
`(princ ,form ,string-stream))))) |
|---|
| 571 |
((#\@) |
|---|
| 572 |
;; #\Q - might be an interpolation |
|---|
| 573 |
(let ((form (read-form)) |
|---|
| 574 |
(element (gensym)) |
|---|
| 575 |
(first (gensym))) |
|---|
| 576 |
(cond ((null form) |
|---|
| 577 |
;; no, just at-sign |
|---|
| 578 |
#\@) |
|---|
| 579 |
(handle-next-char |
|---|
| 580 |
;; yes, and we have to |
|---|
| 581 |
;; modify the first |
|---|
| 582 |
;; character |
|---|
| 583 |
(prog1 |
|---|
| 584 |
(let ((string (gensym))) |
|---|
| 585 |
`(loop for ,first = t then nil |
|---|
| 586 |
for ,element in ,form |
|---|
| 587 |
unless ,first do |
|---|
| 588 |
(princ *list-delimiter* |
|---|
| 589 |
,string-stream) |
|---|
| 590 |
if ,first do |
|---|
| 591 |
(let ((,string |
|---|
| 592 |
(format nil "~A" |
|---|
| 593 |
,element))) |
|---|
| 594 |
(when (plusp (length ,string)) |
|---|
| 595 |
(setf (char ,string 0) |
|---|
| 596 |
(,(if (eq handle-next-char |
|---|
| 597 |
:downcase) |
|---|
| 598 |
'char-downcase |
|---|
| 599 |
'char-upcase) |
|---|
| 600 |
(char ,string 0)))) |
|---|
| 601 |
(write-string ,string ,string-stream)) |
|---|
| 602 |
else do |
|---|
| 603 |
(princ ,element ,string-stream))) |
|---|
| 604 |
(setq handle-next-char nil))) |
|---|
| 605 |
(t |
|---|
| 606 |
;; no modification, just |
|---|
| 607 |
;; insert a form to PRINC |
|---|
| 608 |
;; this interpolated list |
|---|
| 609 |
;; (including the list |
|---|
| 610 |
;; delimiters inbetween) |
|---|
| 611 |
`(loop for ,first = t then nil |
|---|
| 612 |
for ,element in ,form |
|---|
| 613 |
unless ,first do (princ *list-delimiter* |
|---|
| 614 |
,string-stream) |
|---|
| 615 |
do (princ ,element ,string-stream)))))) |
|---|
| 616 |
;; just a 'normal' character |
|---|
| 617 |
(otherwise next-char)))))) |
|---|
| 618 |
(when interpolation |
|---|
| 619 |
;; INTERPOLATION is NIL if we just saw #\l or #\u |
|---|
| 620 |
(when (and handle-next-char |
|---|
| 621 |
(consp interpolation) |
|---|
| 622 |
(eq (first interpolation) |
|---|
| 623 |
'write-string)) |
|---|
| 624 |
;; if we have to upcase or downcase the following |
|---|
| 625 |
;; character and we just collected a form (from a |
|---|
| 626 |
;; \Q/\L/\U scope) we have to insert code for the |
|---|
| 627 |
;; modification |
|---|
| 628 |
(setf (second interpolation) |
|---|
| 629 |
(let ((string (gensym))) |
|---|
| 630 |
`(let ((,string ,(second interpolation))) |
|---|
| 631 |
(when (plusp (length ,string)) |
|---|
| 632 |
(setf (char ,string 0) |
|---|
| 633 |
(,(if (eq handle-next-char :downcase) |
|---|
| 634 |
'char-downcase |
|---|
| 635 |
'char-upcase) |
|---|
| 636 |
(char ,string 0)))) |
|---|
| 637 |
,string))) |
|---|
| 638 |
(setq handle-next-char nil)) |
|---|
| 639 |
(cond ((characterp interpolation) |
|---|
| 640 |
;; add one character to COLLECTOR and handle |
|---|
| 641 |
;; it according to HANDLE-NEXT-CHAR |
|---|
| 642 |
(vector-push-extend (case handle-next-char |
|---|
| 643 |
((:downcase) |
|---|
| 644 |
(setq handle-next-char nil) |
|---|
| 645 |
(char-downcase interpolation)) |
|---|
| 646 |
((:upcase) |
|---|
| 647 |
(setq handle-next-char nil) |
|---|
| 648 |
(char-upcase interpolation)) |
|---|
| 649 |
(otherwise |
|---|
| 650 |
interpolation)) |
|---|
| 651 |
collector)) |
|---|
| 652 |
((stringp interpolation) |
|---|
| 653 |
;; add a string to COLLECTOR and handle its |
|---|
| 654 |
;; first character according to |
|---|
| 655 |
;; HANDLE-NEXT-CHAR |
|---|
| 656 |
(loop for char across interpolation |
|---|
| 657 |
do (vector-push-extend (case handle-next-char |
|---|
| 658 |
((:downcase) |
|---|
| 659 |
(setq handle-next-char nil) |
|---|
| 660 |
(char-downcase char)) |
|---|
| 661 |
((:upcase) |
|---|
| 662 |
(setq handle-next-char nil) |
|---|
| 663 |
(char-upcase char)) |
|---|
| 664 |
(otherwise |
|---|
| 665 |
char)) |
|---|
| 666 |
collector))) |
|---|
| 667 |
((plusp (length collector)) |
|---|
| 668 |
;; add code (to be executed at runtime) but |
|---|
| 669 |
;; make sure to empty COLLECTOR first |
|---|
| 670 |
(push collector result) |
|---|
| 671 |
(push interpolation result) |
|---|
| 672 |
;; reset collector |
|---|
| 673 |
(setf collector (make-collector))) |
|---|
| 674 |
(t |
|---|
| 675 |
;; same but COLLECTOR is empty |
|---|
| 676 |
(push interpolation result))))))))) |
|---|
| 677 |
(if (every #'stringp result) |
|---|
| 678 |
;; if all elements of RESULT are strings we can return a |
|---|
| 679 |
;; constant string |
|---|
| 680 |
(string-list-to-string result) |
|---|
| 681 |
;; otherwise we have to wrap the PRINCs emitted above into a |
|---|
| 682 |
;; WITH-OUTPUT-TO-STRING form |
|---|
| 683 |
`(with-output-to-string (,string-stream) |
|---|
| 684 |
,@(loop for interpolation in result |
|---|
| 685 |
if (stringp interpolation) |
|---|
| 686 |
collect `(write-string ,interpolation ,string-stream) |
|---|
| 687 |
else |
|---|
| 688 |
collect interpolation))))) |
|---|
| 689 |
|
|---|
| 690 |
(defun %enable-interpol-syntax () |
|---|
| 691 |
"Internal function used to enable reader syntax and store current |
|---|
| 692 |
readtable on stack." |
|---|
| 693 |
(push *readtable* |
|---|
| 694 |
*previous-readtables*) |
|---|
| 695 |
(setq *readtable* (copy-readtable)) |
|---|
| 696 |
(set-dispatch-macro-character #\# #\? #'interpol-reader) |
|---|
| 697 |
(values)) |
|---|
| 698 |
|
|---|
| 699 |
(defun %disable-interpol-syntax () |
|---|
| 700 |
"Internal function used to restore previous readtable." |
|---|
| 701 |
(if *previous-readtables* |
|---|
| 702 |
(setq *readtable* (pop *previous-readtables*)) |
|---|
| 703 |
(setq *readtable* (copy-readtable nil))) |
|---|
| 704 |
(values)) |
|---|
| 705 |
|
|---|
| 706 |
(defmacro enable-interpol-syntax () |
|---|
| 707 |
"Enable CL-INTERPOL reader syntax." |
|---|
| 708 |
`(eval-when (:compile-toplevel :load-toplevel :execute) |
|---|
| 709 |
(%enable-interpol-syntax))) |
|---|
| 710 |
|
|---|
| 711 |
(defmacro disable-interpol-syntax () |
|---|
| 712 |
"Restore readtable which was active before last call to |
|---|
| 713 |
ENABLE-INTERPOL-SYNTAX. If there was no such call, the standard |
|---|
| 714 |
readtable is used." |
|---|
| 715 |
`(eval-when (:compile-toplevel :load-toplevel :execute) |
|---|
| 716 |
(%disable-interpol-syntax))) |
|---|