| 1 |
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*- |
|---|
| 2 |
;;; $Header: /usr/local/cvsrep/cl-ppcre/chartest.lisp,v 1.3 2008/07/23 00:47:58 edi Exp $ |
|---|
| 3 |
|
|---|
| 4 |
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved. |
|---|
| 5 |
|
|---|
| 6 |
;;; Redistribution and use in source and binary forms, with or without |
|---|
| 7 |
;;; modification, are permitted provided that the following conditions |
|---|
| 8 |
;;; are met: |
|---|
| 9 |
|
|---|
| 10 |
;;; * Redistributions of source code must retain the above copyright |
|---|
| 11 |
;;; notice, this list of conditions and the following disclaimer. |
|---|
| 12 |
|
|---|
| 13 |
;;; * Redistributions in binary form must reproduce the above |
|---|
| 14 |
;;; copyright notice, this list of conditions and the following |
|---|
| 15 |
;;; disclaimer in the documentation and/or other materials |
|---|
| 16 |
;;; provided with the distribution. |
|---|
| 17 |
|
|---|
| 18 |
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED |
|---|
| 19 |
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|---|
| 20 |
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|---|
| 21 |
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
|---|
| 22 |
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|---|
| 23 |
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE |
|---|
| 24 |
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|---|
| 25 |
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|---|
| 26 |
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|---|
| 27 |
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|---|
| 28 |
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 29 |
|
|---|
| 30 |
(in-package :cl-ppcre) |
|---|
| 31 |
|
|---|
| 32 |
(defun create-hash-table-from-test-function (test-function start end) |
|---|
| 33 |
"Creates and returns a hash table representing all characters with |
|---|
| 34 |
character codes between START and END which satisfy TEST-FUNCTION." |
|---|
| 35 |
(declare #.*standard-optimize-settings*) |
|---|
| 36 |
(loop with hash-table = (make-hash-table) |
|---|
| 37 |
for code from start below end |
|---|
| 38 |
for char = (code-char code) |
|---|
| 39 |
when (and char (funcall test-function char)) |
|---|
| 40 |
do (setf (gethash char hash-table) t) |
|---|
| 41 |
finally (return hash-table))) |
|---|
| 42 |
|
|---|
| 43 |
(defun create-optimized-test-function (test-function &key |
|---|
| 44 |
(start 0) |
|---|
| 45 |
(end *regex-char-code-limit*) |
|---|
| 46 |
(kind *optimize-char-classes*)) |
|---|
| 47 |
"Given a unary test function which is applicable to characters |
|---|
| 48 |
returns a function which yields the same boolean results for all |
|---|
| 49 |
characters with character codes from START to \(excluding) END. If |
|---|
| 50 |
KIND is NIL, TEST-FUNCTION will simply be returned. Otherwise, KIND |
|---|
| 51 |
should be one of: |
|---|
| 52 |
|
|---|
| 53 |
* :HASH-TABLE - builds a hash table representing all characters which |
|---|
| 54 |
satisfy the test and returns a closure which checks if |
|---|
| 55 |
a character is in that hash table |
|---|
| 56 |
|
|---|
| 57 |
* :CHARSET - instead of a hash table uses a \"charset\" which is a |
|---|
| 58 |
data structure using non-linear hashing and optimized to |
|---|
| 59 |
represent \(sparse) sets of characters in a fast and |
|---|
| 60 |
space-efficient way \(contributed by Nikodemus Siivola) |
|---|
| 61 |
|
|---|
| 62 |
* :CHARMAP - instead of a hash table uses a bit vector to represent |
|---|
| 63 |
the set of characters |
|---|
| 64 |
|
|---|
| 65 |
You can also use :HASH-TABLE* or :CHARSET* which are like :HASH-TABLE |
|---|
| 66 |
and :CHARSET but use the complement of the set if the set contains |
|---|
| 67 |
more than half of all characters between START and END. This saves |
|---|
| 68 |
space but needs an additional pass across all characters to create the |
|---|
| 69 |
data structure. There is no corresponding :CHARMAP* kind as the bit |
|---|
| 70 |
vectors are already created to cover the smallest possible interval |
|---|
| 71 |
which contains either the set or its complement." |
|---|
| 72 |
(declare #.*standard-optimize-settings*) |
|---|
| 73 |
(ecase kind |
|---|
| 74 |
((nil) test-function) |
|---|
| 75 |
(:charmap |
|---|
| 76 |
(let ((charmap (create-charmap-from-test-function test-function start end))) |
|---|
| 77 |
(lambda (char) |
|---|
| 78 |
(in-charmap-p char charmap)))) |
|---|
| 79 |
((:charset :charset*) |
|---|
| 80 |
(let ((charset (create-charset-from-test-function test-function start end))) |
|---|
| 81 |
(cond ((or (eq kind :charset) |
|---|
| 82 |
(<= (charset-count charset) (ceiling (- end start) 2))) |
|---|
| 83 |
(lambda (char) |
|---|
| 84 |
(in-charset-p char charset))) |
|---|
| 85 |
(t (setq charset (create-charset-from-test-function (complement* test-function) |
|---|
| 86 |
start end)) |
|---|
| 87 |
(lambda (char) |
|---|
| 88 |
(not (in-charset-p char charset))))))) |
|---|
| 89 |
((:hash-table :hash-table*) |
|---|
| 90 |
(let ((hash-table (create-hash-table-from-test-function test-function start end))) |
|---|
| 91 |
(cond ((or (eq kind :charset) |
|---|
| 92 |
(<= (hash-table-count hash-table) (ceiling (- end start) 2))) |
|---|
| 93 |
(lambda (char) |
|---|
| 94 |
(gethash char hash-table))) |
|---|
| 95 |
(t (setq hash-table (create-hash-table-from-test-function (complement* test-function) |
|---|
| 96 |
start end)) |
|---|
| 97 |
(lambda (char) |
|---|
| 98 |
(not (gethash char hash-table))))))))) |
|---|