root/trunk/thirdparty/cl-ppcre/chartest.lisp

Revision 3581, 4.9 kB (checked in by edi, 6 months ago)

Update to current dev version

Line 
1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
2 ;;; $Header: /usr/local/cvsrep/cl-ppcre/chartest.lisp,v 1.3 2008/07/23 00:47:58 edi Exp $
3
4 ;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
5
6 ;;; Redistribution and use in source and binary forms, with or without
7 ;;; modification, are permitted provided that the following conditions
8 ;;; are met:
9
10 ;;;   * Redistributions of source code must retain the above copyright
11 ;;;     notice, this list of conditions and the following disclaimer.
12
13 ;;;   * Redistributions in binary form must reproduce the above
14 ;;;     copyright notice, this list of conditions and the following
15 ;;;     disclaimer in the documentation and/or other materials
16 ;;;     provided with the distribution.
17
18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 (in-package :cl-ppcre)
31
32 (defun create-hash-table-from-test-function (test-function start end)
33   "Creates and returns a hash table representing all characters with
34 character codes between START and END which satisfy TEST-FUNCTION."
35   (declare #.*standard-optimize-settings*)
36   (loop with hash-table = (make-hash-table)
37         for code from start below end
38         for char = (code-char code)
39         when (and char (funcall test-function char))
40         do (setf (gethash char hash-table) t)
41         finally (return hash-table)))
42
43 (defun create-optimized-test-function (test-function &key
44                                                      (start 0)
45                                                      (end *regex-char-code-limit*)
46                                                      (kind *optimize-char-classes*))
47   "Given a unary test function which is applicable to characters
48 returns a function which yields the same boolean results for all
49 characters with character codes from START to \(excluding) END.  If
50 KIND is NIL, TEST-FUNCTION will simply be returned.  Otherwise, KIND
51 should be one of:
52
53 * :HASH-TABLE - builds a hash table representing all characters which
54                 satisfy the test and returns a closure which checks if
55                 a character is in that hash table
56
57 * :CHARSET - instead of a hash table uses a \"charset\" which is a
58              data structure using non-linear hashing and optimized to
59              represent \(sparse) sets of characters in a fast and
60              space-efficient way \(contributed by Nikodemus Siivola)
61
62 * :CHARMAP - instead of a hash table uses a bit vector to represent
63              the set of characters
64
65 You can also use :HASH-TABLE* or :CHARSET* which are like :HASH-TABLE
66 and :CHARSET but use the complement of the set if the set contains
67 more than half of all characters between START and END.  This saves
68 space but needs an additional pass across all characters to create the
69 data structure.  There is no corresponding :CHARMAP* kind as the bit
70 vectors are already created to cover the smallest possible interval
71 which contains either the set or its complement."
72   (declare #.*standard-optimize-settings*)
73   (ecase kind
74     ((nil) test-function)
75     (:charmap
76      (let ((charmap (create-charmap-from-test-function test-function start end)))
77        (lambda (char)
78          (in-charmap-p char charmap))))
79     ((:charset :charset*)
80      (let ((charset (create-charset-from-test-function test-function start end)))
81        (cond ((or (eq kind :charset)
82                   (<= (charset-count charset) (ceiling (- end start) 2)))
83               (lambda (char)
84                 (in-charset-p char charset)))
85              (t (setq charset (create-charset-from-test-function (complement* test-function)
86                                                                  start end))
87                 (lambda (char)
88                   (not (in-charset-p char charset)))))))
89     ((:hash-table :hash-table*)
90      (let ((hash-table (create-hash-table-from-test-function test-function start end)))
91        (cond ((or (eq kind :charset)
92                   (<= (hash-table-count hash-table) (ceiling (- end start) 2)))
93               (lambda (char)
94                 (gethash char hash-table)))
95              (t (setq hash-table (create-hash-table-from-test-function (complement* test-function)
96                                                                        start end))
97                 (lambda (char)
98                   (not (gethash char hash-table)))))))))
Note: See TracBrowser for help on using the browser.