root/trunk/thirdparty/cl-ppcre/charmap.lisp

Revision 3581, 6.8 kB (checked in by edi, 6 months ago)

Update to current dev version

Line 
1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
2 ;;; $Header: /usr/local/cvsrep/cl-ppcre/charmap.lisp,v 1.18 2008/07/22 23:54:59 edi Exp $
3
4 ;;; An optimized representation of sets of characters.
5
6 ;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
7
8 ;;; Redistribution and use in source and binary forms, with or without
9 ;;; modification, are permitted provided that the following conditions
10 ;;; are met:
11
12 ;;;   * Redistributions of source code must retain the above copyright
13 ;;;     notice, this list of conditions and the following disclaimer.
14
15 ;;;   * Redistributions in binary form must reproduce the above
16 ;;;     copyright notice, this list of conditions and the following
17 ;;;     disclaimer in the documentation and/or other materials
18 ;;;     provided with the distribution.
19
20 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
21 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
24 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
26 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32 (in-package :cl-ppcre)
33
34 (defstruct (charmap  (:constructor make-charmap%))
35   ;; a bit vector mapping char codes to "booleans" (1 for set members,
36   ;; 0 for others)
37   (vector #*0 :type simple-bit-vector)
38   ;; the smallest character code of all characters in the set
39   (start 0 :type fixnum)
40   ;; the upper (exclusive) bound of all character codes in the set
41   (end 0 :type fixnum)
42   ;; the number of characters in the set, or NIL if this is unknown
43   (count nil :type (or fixnum null))
44   ;; whether the charmap actually represents the complement of the set 
45   (complementp nil :type boolean))
46
47 ;; seems to be necessary for some Lisps like ClozureCL
48 (defmethod make-load-form ((map charmap) &optional environment)
49   (make-load-form-saving-slots map :environment environment))
50
51 (declaim (inline in-charmap-p))
52 (defun in-charmap-p (char charmap)
53   "Tests whether the character CHAR belongs to the set represented by CHARMAP."
54   (declare #.*standard-optimize-settings*)
55   (declare (character char) (charmap charmap))
56   (let* ((char-code (char-code char))
57          (char-in-vector-p
58           (let ((charmap-start (charmap-start charmap)))
59             (declare (fixnum charmap-start))
60             (and (<= charmap-start char-code)
61                  (< char-code (the fixnum (charmap-end charmap)))
62                  (= 1 (sbit (the simple-bit-vector (charmap-vector charmap))
63                             (- char-code charmap-start)))))))
64     (cond ((charmap-complementp charmap) (not char-in-vector-p))
65           (t char-in-vector-p))))
66
67 (defun charmap-contents (charmap)
68   "Returns a list of all characters belonging to a character map.
69 Only works for non-complement charmaps."
70   (declare #.*standard-optimize-settings*)
71   (declare (charmap charmap))
72   (and (not (charmap-complementp charmap))
73        (loop for code of-type fixnum from (charmap-start charmap) to (charmap-end charmap)
74              for i across (the simple-bit-vector (charmap-vector charmap))
75              when (= i 1)
76              collect (code-char code))))
77
78 (defun make-charmap (start end test-function &optional complementp)
79   "Creates and returns a charmap representing all characters with
80 character codes in the interval [start end) that satisfy
81 TEST-FUNCTION.  The COMPLEMENTP slot of the charmap is set to the
82 value of the optional argument, but this argument doesn't have an
83 effect on how TEST-FUNCTION is used."
84   (declare #.*standard-optimize-settings*)
85   (declare (fixnum start end))
86   (let ((vector (make-array (- end start) :element-type 'bit))
87         (count 0))
88     (declare (fixnum count))
89     (loop for code from start below end
90           for char = (code-char code)
91           for index from 0
92           when char do
93           (incf count)
94           (setf (sbit vector index) (if (funcall test-function char) 1 0)))
95     (make-charmap% :vector vector
96                    :start start
97                    :end end
98                    ;; we don't know for sure if COMPLEMENTP is true as
99                    ;; there isn't a necessary a character for each
100                    ;; integer below *REGEX-CHAR-CODE-LIMIT*
101                    :count (and (not complementp) count)
102                    ;; make sure it's boolean
103                    :complementp (not (not complementp)))))
104
105 (defun create-charmap-from-test-function (test-function start end)
106   "Creates and returns a charmap representing all characters with
107 character codes between START and END which satisfy TEST-FUNCTION.
108 Tries to find the smallest interval which is necessary to represent
109 the character set and uses the complement representation if that
110 helps."
111   (declare #.*standard-optimize-settings*)
112   (let (start-in end-in start-out end-out)
113     ;; determine the smallest intervals containing the set and its
114     ;; complement, [start-in, end-in) and [start-out, end-out) - first
115     ;; the lower bound
116     (loop for code from start below end
117           for char = (code-char code)
118           until (and start-in start-out)
119           when (and char
120                     (not start-in)
121                     (funcall test-function char))
122           do (setq start-in code)
123           when (and char
124                     (not start-out)
125                     (not (funcall test-function char)))
126           do (setq start-out code))
127     (unless start-in
128       ;; no character satisfied the test, so return a "pseudo" charmap
129       ;; where IN-CHARMAP-P is always false
130       (return-from create-charmap-from-test-function
131         (make-charmap% :count 0)))
132     (unless start-out
133       ;; no character failed the test, so return a "pseudo" charmap
134       ;; where IN-CHARMAP-P is always true
135       (return-from create-charmap-from-test-function
136         (make-charmap% :complementp t)))
137     ;; now determine upper bound
138     (loop for code from (1- end) downto start
139           for char = (code-char code)
140           until (and end-in end-out)
141           when (and char
142                     (not end-in)
143                     (funcall test-function char))
144           do (setq end-in (1+ code))
145           when (and char
146                     (not end-out)
147                     (not (funcall test-function char)))
148           do (setq end-out (1+ code)))
149     ;; use the smaller interval
150     (cond ((<= (- end-in start-in) (- end-out start-out))
151            (make-charmap start-in end-in test-function))
152           (t (make-charmap start-out end-out (complement* test-function) t)))))
Note: See TracBrowser for help on using the browser.