root/trunk/thirdparty/cl-ppcre/regex-class.lisp

Revision 3581, 9.7 kB (checked in by edi, 6 months ago)

Update to current dev version

Line 
1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
2 ;;; $Header: /usr/local/cvsrep/cl-ppcre/regex-class.lisp,v 1.42 2008/07/22 22:38:05 edi Exp $
3
4 ;;; This file defines the REGEX class.  REGEX objects are used to
5 ;;; represent the (transformed) parse trees internally
6
7 ;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.
8
9 ;;; Redistribution and use in source and binary forms, with or without
10 ;;; modification, are permitted provided that the following conditions
11 ;;; are met:
12
13 ;;;   * Redistributions of source code must retain the above copyright
14 ;;;     notice, this list of conditions and the following disclaimer.
15
16 ;;;   * Redistributions in binary form must reproduce the above
17 ;;;     copyright notice, this list of conditions and the following
18 ;;;     disclaimer in the documentation and/or other materials
19 ;;;     provided with the distribution.
20
21 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
22 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
25 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
27 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33 (in-package :cl-ppcre)
34
35 (defclass regex ()
36   ()
37   (:documentation "The REGEX base class.  All other classes inherit
38 from this one."))
39
40 (defclass seq (regex)
41   ((elements :initarg :elements
42              :accessor elements
43              :type cons
44              :documentation "A list of REGEX objects."))
45   (:documentation "SEQ objects represents sequences of regexes.
46 \(Like \"ab\" is the sequence of \"a\" and \"b\".)"))
47
48 (defclass alternation (regex)
49   ((choices :initarg :choices
50             :accessor choices
51             :type cons
52             :documentation "A list of REGEX objects"))
53   (:documentation "ALTERNATION objects represent alternations of
54 regexes.  \(Like \"a|b\" ist the alternation of \"a\" or \"b\".)"))
55
56 (defclass lookahead (regex)
57   ((regex :initarg :regex
58           :accessor regex
59           :documentation "The REGEX object we're checking.")
60    (positivep :initarg :positivep
61               :reader positivep
62               :documentation "Whether this assertion is positive."))
63   (:documentation "LOOKAHEAD objects represent look-ahead assertions."))
64
65 (defclass lookbehind (regex)
66   ((regex :initarg :regex
67           :accessor regex
68           :documentation "The REGEX object we're checking.")
69    (positivep :initarg :positivep
70               :reader positivep
71               :documentation "Whether this assertion is positive.")
72    (len :initarg :len
73         :accessor len
74         :type fixnum
75         :documentation "The \(fixed) length of the enclosed regex."))
76   (:documentation "LOOKBEHIND objects represent look-behind assertions."))
77
78 (defclass repetition (regex)
79   ((regex :initarg :regex
80           :accessor regex
81           :documentation "The REGEX that's repeated.")
82    (greedyp :initarg :greedyp
83             :reader greedyp
84             :documentation "Whether the repetition is greedy.")
85    (minimum :initarg :minimum
86             :accessor minimum
87             :type fixnum
88             :documentation "The minimal number of repetitions.")
89    (maximum :initarg :maximum
90             :accessor maximum
91             :documentation "The maximal number of repetitions.
92 Can be NIL for unbounded.")
93    (min-len :initarg :min-len
94             :reader min-len
95             :documentation "The minimal length of the enclosed regex.")
96    (len :initarg :len
97         :reader len
98         :documentation "The length of the enclosed regex.  NIL if
99 unknown.")
100    (min-rest :initform 0
101              :accessor min-rest
102              :type fixnum
103              :documentation "The minimal number of characters which
104 must appear after this repetition.")
105    (contains-register-p :initarg :contains-register-p
106                         :reader contains-register-p
107                         :documentation "Whether the regex contains a
108 register."))
109   (:documentation "REPETITION objects represent repetitions of regexes."))
110
111 (defclass register (regex)
112   ((regex :initarg :regex
113           :accessor regex
114           :documentation "The inner regex.")
115    (num :initarg :num
116         :reader num
117         :type fixnum
118         :documentation "The number of this register, starting from 0.
119 This is the index into *REGS-START* and *REGS-END*.")
120    (name :initarg :name
121          :reader name
122          :documentation "Name of this register or NIL."))
123   (:documentation "REGISTER objects represent register groups."))
124
125 (defclass standalone (regex)
126   ((regex :initarg :regex
127           :accessor regex
128           :documentation "The inner regex."))
129   (:documentation "A standalone regular expression."))
130  
131 (defclass back-reference (regex)
132   ((num :initarg :num
133         :accessor num
134         :type fixnum
135         :documentation "The number of the register this
136 reference refers to.")
137    (name :initarg :name
138          :accessor name
139          :documentation "The name of the register this
140 reference refers to or NIL.")
141    (case-insensitive-p :initarg :case-insensitive-p
142                        :reader case-insensitive-p
143                        :documentation "Whether we check
144 case-insensitively."))
145   (:documentation "BACK-REFERENCE objects represent backreferences."))
146
147 (defclass char-class (regex)
148   ((test-function :initarg :test-function
149                   :reader test-function
150                   :type (or function symbol nil)
151                   :documentation "A unary function \(accepting a
152 character) which stands in for the character class and does the work
153 of checking whether a character belongs to the class."))
154   (:documentation "CHAR-CLASS objects represent character classes."))
155
156 (defclass str (regex)
157   ((str :initarg :str
158         :accessor str
159         :type string
160         :documentation "The actual string.")
161    (len :initform 0
162         :accessor len
163         :type fixnum
164         :documentation "The length of the string.")
165    (case-insensitive-p :initarg :case-insensitive-p
166                        :reader case-insensitive-p
167                        :documentation "If we match case-insensitively.")
168    (offset :initform nil
169            :accessor offset
170            :documentation "Offset from the left of the whole
171 parse tree. The first regex has offset 0. NIL if unknown, i.e. behind
172 a variable-length regex.")
173    (skip :initform nil
174          :initarg :skip
175          :accessor skip
176          :documentation "If we can avoid testing for this
177 string because the SCAN function has done this already.")
178    (start-of-end-string-p :initform nil
179                           :accessor start-of-end-string-p
180                           :documentation "If this is the unique
181 STR which starts END-STRING (a slot of MATCHER)."))
182   (:documentation "STR objects represent string."))
183
184 (defclass anchor (regex)
185   ((startp :initarg :startp
186            :reader startp
187            :documentation "Whether this is a \"start anchor\".")
188    (multi-line-p :initarg :multi-line-p
189                  :initform nil
190                  :reader multi-line-p
191                  :documentation "Whether we're in multi-line mode,
192 i.e. whether each #\\Newline is surrounded by anchors.")
193    (no-newline-p :initarg :no-newline-p
194                  :initform nil
195                  :reader no-newline-p
196                  :documentation "Whether we ignore #\\Newline at the end."))
197   (:documentation "ANCHOR objects represent anchors like \"^\" or \"$\"."))
198
199 (defclass everything (regex)
200   ((single-line-p :initarg :single-line-p
201                   :reader single-line-p
202                   :documentation "Whether we're in single-line mode,
203 i.e. whether we also match #\\Newline."))
204   (:documentation "EVERYTHING objects represent regexes matching
205 \"everything\", i.e. dots."))
206
207 (defclass word-boundary (regex)
208   ((negatedp :initarg :negatedp
209              :reader negatedp
210              :documentation "Whether we mean the opposite,
211 i.e. no word-boundary."))
212   (:documentation "WORD-BOUNDARY objects represent word-boundary assertions."))
213
214 (defclass branch (regex)
215   ((test :initarg :test
216          :accessor test
217          :documentation "The test of this branch, one of
218 LOOKAHEAD, LOOKBEHIND, or a number.")
219    (then-regex :initarg :then-regex
220                :accessor then-regex
221                :documentation "The regex that's to be matched if the
222 test succeeds.")
223    (else-regex :initarg :else-regex
224                :initform (make-instance 'void)
225                :accessor else-regex
226                :documentation "The regex that's to be matched if the
227 test fails."))
228   (:documentation "BRANCH objects represent Perl's conditional regular
229 expressions."))
230    
231 (defclass filter (regex)
232   ((fn :initarg :fn
233        :accessor fn
234        :type (or function symbol)
235        :documentation "The user-defined function.")
236    (len :initarg :len
237         :reader len
238         :documentation "The fixed length of this filter or NIL."))
239   (:documentation "FILTER objects represent arbitrary functions
240 defined by the user."))
241
242 (defclass void (regex)
243   ()
244   (:documentation "VOID objects represent empty regular expressions."))
245
246 (defmethod initialize-instance :after ((str str) &rest init-args)
247   (declare #.*standard-optimize-settings*)
248   (declare (ignore init-args))
249   "Automatically computes the length of a STR after initialization."
250   (let ((str-slot (slot-value str 'str)))
251     (unless (typep str-slot 'simple-string)
252       (setf (slot-value str 'str) (coerce str-slot 'simple-string))))
253   (setf (len str) (length (str str))))
254
Note: See TracBrowser for help on using the browser.