root/trunk/thirdparty/chunga/read.lisp

Revision 3186, 13.3 kB (checked in by edi, 8 months ago)

Import current Chunga dev version from laptop

Previous history at http://trac.common-lisp.net/tbnl/browser/branches/chunga

Line 
1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CHUNGA; Base: 10 -*-
2 ;;; $Header: /usr/local/cvsrep/chunga/read.lisp,v 1.22 2008/05/26 08:18:00 edi Exp $
3
4 ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz.  All rights reserved.
5
6 ;;; Redistribution and use in source and binary forms, with or without
7 ;;; modification, are permitted provided that the following conditions
8 ;;; are met:
9
10 ;;;   * Redistributions of source code must retain the above copyright
11 ;;;     notice, this list of conditions and the following disclaimer.
12
13 ;;;   * Redistributions in binary form must reproduce the above
14 ;;;     copyright notice, this list of conditions and the following
15 ;;;     disclaimer in the documentation and/or other materials
16 ;;;     provided with the distribution.
17
18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 (in-package :chunga)
31
32 (defun signal-unexpected-chars (last-char expected-chars)
33   "Signals an error that LAST-CHAR was read although one of
34 EXPECTED-CHARS was expected.  \(Note that EXPECTED-CHARS,
35 despites its name, can also be a single character instead of a
36 list).  Uses *CURRENT-ERROR-MESSAGE* if it's not NIL, or calls
37 *CURRENT-ERROR-FUNCTION* otherwise."
38   (cond (*current-error-function*
39          (funcall *current-error-function* last-char expected-chars))
40         (*current-error-message*
41          (error "~@[~A~%~]~:[End of file~;Read character ~:*~S~], but expected ~:[a member of ~S~;~S~]."
42                 *current-error-message* last-char (atom expected-chars) expected-chars))))
43
44 (defun charp (char)
45   "Returns true if the Lisp character CHAR is a CHAR according to RFC 2616."
46   (<= 0 (char-code char) 127))
47
48 (defun controlp (char)
49   "Returns true if the Lisp character CHAR is a CTL according to RFC 2616."
50   (or (<= 0 (char-code char) 31)
51       (= (char-code char) 127)))
52
53 (defun separatorp (char)
54   "Returns true if the Lisp character CHAR is a separator
55 according to RFC 2616."
56   (find char #.(format nil " ()<>@,;:\\\"/[]?={}~C" #\Tab)
57         :test #'char=))
58
59 (defun whitespacep (char)
60   "Returns true if the Lisp character CHAR is whitespace
61 according to RFC 2616."
62   (member char '(#\Space #\Tab) :test #'char=))
63
64 (defun token-char-p (char)
65   "Returns true if the Lisp character CHAR is a token constituent
66 according to RFC 2616."
67   (and (charp char)
68        (not (or (controlp char)
69                 (separatorp char)))))
70
71 (defun assert-char (stream expected-char)
72   "Reads the next character from STREAM and checks if it is the
73 character EXPECTED-CHAR.  Signals an error otherwise."
74   (let ((char (read-char* stream)))
75     (unless (and char (char= char expected-char))
76       (signal-unexpected-chars char expected-char))
77     char))
78
79 (defun assert-crlf (stream)
80   "Reads the next two characters from STREAM and checks if these
81 are a carriage return and a linefeed.  Signals an error
82 otherwise."
83   (assert-char stream #\Return)
84   (assert-char stream #\Linefeed))
85
86 (defun read-line* (stream &optional log-stream)
87   "Reads and assembles characters from the binary stream STREAM until
88 a carriage return is read.  Makes sure that the following character is
89 a linefeed.  If *ACCEPT-BOGUS-EOLS* is not NIL, then the function will
90 also accept a lone carriage return or linefeed as an acceptable line
91 break.  Returns the string of characters read excluding the line
92 break.  Returns NIL if input ends before one character was read.
93 Additionally logs this string to LOG-STREAM if it is not NIL."
94   (let ((result
95          (with-output-to-string (line)
96            (loop for char-seen-p = nil then t
97                  for char = (read-char* stream)
98                  for is-cr-p = (and char (char= char #\Return))
99                  until (or (null char)
100                            is-cr-p
101                            (and *accept-bogus-eols*
102                                 (char= char #\Linefeed)))
103                  do (write-char char line)
104                  finally (cond ((and (not char-seen-p)
105                                      (null char))
106                                 (return-from read-line* nil))
107                                ((not *accept-bogus-eols*)
108                                 (assert-char stream #\Linefeed))
109                                (is-cr-p
110                                 (when (eql (peek-char* stream) #\Linefeed)
111                                   (read-char* stream))))))))
112     (when log-stream
113       (write-line result log-stream)
114       (finish-output log-stream))
115     result))
116
117 (defun trim-whitespace (string)
118   "Returns a version of the string STRING where spaces and tab
119 characters are trimmed from the start and the end.  Might return
120 STRING."
121   ;; optimized version to replace STRING-TRIM, suggested by Jason Kantz
122   (declare (optimize
123             speed
124             (safety 0)
125             (space 0)
126             (debug 1)
127             (compilation-speed 0)
128             #+:lispworks (hcl:fixnum-safety 0)))
129   (declare (string string))
130   (let* ((length (length string))
131          (start (loop for i of-type fixnum from 0 below length
132                       while (or (char= #\space (char string i))
133                                 (char= #\tab (char string i)))
134                       finally (return i)))
135          (end (loop for i of-type fixnum downfrom (1- length) to 0
136                     while (or (char= #\space (char string i))
137                               (char= #\tab (char string i)))
138                     finally (return (1+ i)))))
139     (declare (fixnum start end))
140     (cond ((and (zerop start) (= end length)) string)
141           (t (subseq string start end)))))
142
143 (defun read-http-headers (stream &optional log-stream)
144   "Reads HTTP header lines from STREAM \(except for the initial
145 status line which is supposed to be read already) and returns a
146 corresponding alist of names and values where the names are
147 keywords and the values are strings.  Multiple lines with the
148 same name are combined into one value, the individual values
149 separated by commas.  Header lines which are spread across
150 multiple lines are recognized and treated correctly.  Additonally
151 logs the header lines to LOG-STREAM if it is not NIL."
152   (let (headers
153         (*current-error-message* "While reading HTTP headers:"))
154     (labels ((read-header-line ()
155                "Reads one header line, considering continuations."
156                (with-output-to-string (header-line)
157                  (loop
158                   (let ((line (trim-whitespace (read-line* stream log-stream))))
159                     (when (zerop (length line))
160                       (return))
161                     (write-sequence line header-line)
162                     (let ((next (peek-char* stream)))
163                       (unless (whitespacep next)
164                         (return)))
165                     ;; we've seen whitespace starting a continutation,
166                     ;; so we loop
167                     (write-char #\Space header-line)))))
168              (split-header (line)
169                "Splits line at colon and converts it into a cons.
170 Returns NIL if LINE consists solely of whitespace."
171                (unless (zerop (length (trim-whitespace line)))
172                  (let ((colon-pos (or (position #\: line :test #'char=)
173                                       (error "Couldn't find colon in header line ~S." line))))
174                    (cons (as-keyword (subseq line 0 colon-pos))
175                          (trim-whitespace (subseq line (1+ colon-pos)))))))
176              (add-header (pair)
177                "Adds the name/value cons PAIR to HEADERS.  Takes
178 care of multiple headers with the same name."
179                (let* ((name (car pair))
180                       (existing-header (assoc name headers :test #'eq))
181                       (existing-value (cdr existing-header)))
182                  (cond (existing-header
183                         (setf (cdr existing-header)
184                               (format nil "~A~:[,~;~]~A"
185                                       existing-value
186                                       (and *treat-semicolon-as-continuation*
187                                            (eq name :set-cookie)
188                                            (ends-with-p (trim-whitespace existing-value) ";"))
189                                       (cdr pair))))
190                        (t (push pair headers))))))
191       (loop for header-pair = (split-header (read-header-line))
192             while header-pair
193             do (add-header header-pair)))
194     (nreverse headers)))
195
196 (defun skip-whitespace (stream)
197   "Consume characters from STREAM until an END-OF-FILE is
198 encountered or a non-whitespace \(according to RFC 2616)
199 characters is seen.  This character is returned \(or NIL in case
200 of END-OF-FILE)."
201   (loop for char = (peek-char* stream nil)
202         while (and char (whitespacep char))
203         do (read-char* stream)
204         finally (return char)))
205
206 (defun read-token (stream)
207   "Read characters from STREAM while they are token constituents
208 \(according to RFC 2616).  It is assumed that there's a token
209 character at the current position.  The token read is returned as
210 a string.  Doesn't signal an error \(but simply stops reading) if
211 END-OF-FILE is encountered after the first character."
212   (with-output-to-string (out)
213     (loop for first = t then nil
214           for char = (if first
215                        (peek-char* stream)
216                        (or (peek-char* stream nil) (return)))
217           while (token-char-p char)
218           do (write-char (read-char* stream) out))))
219
220 (defun read-quoted-string (stream)
221   "Reads a quoted string \(according to RFC 2616).  It is assumed
222 that the character at the current position is the opening quote
223 character.  Returns the string read without quotes and escape
224 characters."
225   (read-char* stream)
226   (with-output-to-string (out)
227     (loop for char = (read-char* stream)
228           until (char= char #\")
229           do (case char
230                (#\\ (write-char (read-char* stream) out))
231                (#\Return (assert-char stream #\Linefeed)
232                          (let ((char (read-char* stream)))
233                            (unless (whitespacep char)
234                              (signal-unexpected-chars char '(#\Space #\Tab)))))
235                (otherwise (write-char char out))))))
236
237 (defun read-cookie-value (stream &key name separators)
238   "Reads a cookie parameter value from STREAM which is returned as a
239 string.  Simply reads until a comma or a semicolon is seen \(or an
240 element of SEPARATORS)."
241   (when (eql #\, (peek-char* stream nil))
242     (return-from read-cookie-value ""))
243   (trim-whitespace
244    (with-output-to-string (out)
245      ;; special case for the `Expires' parameter - maybe skip the first comma
246      (loop with separators% = (cond (separators)
247                                     ((equalp name "Expires") ";")
248                                     (t ",;"))
249            for char = (peek-char* stream nil)
250            until (or (null char) (find char separators% :test #'char=))
251            when (and (null separators)
252                      (or (char= char #\,)
253                          (digit-char-p char)))
254            do (setq separators% '(#\, #\;))
255            do (write-char (read-char* stream) out)))))
256
257 (defun read-name-value-pair (stream &key (value-required-p t) cookie-syntax)
258   "Reads a typical \(in RFC 2616) name/value or attribute/value
259 combination from STREAM - a token followed by a #\\= character and
260 another token or a quoted string.  Returns a cons of name and value,
261 both as strings.  If VALUE-REQUIRED-P is NIL, the #\\= sign and the
262 value are optional.  If COOKIE-SYNTAX is true, uses READ-COOKIE-VALUE
263 internally."
264   (skip-whitespace stream)
265   (let ((name (if cookie-syntax
266                 (read-cookie-value stream :separators "=,")
267                 (read-token stream))))
268     (skip-whitespace stream)
269     (cons name
270           (when (or value-required-p
271                     (eql (peek-char* stream nil) #\=))
272             (assert-char stream #\=)
273             (skip-whitespace stream)
274             (cond (cookie-syntax (read-cookie-value stream :name name))
275                   ((char= (peek-char* stream) #\") (read-quoted-string stream))
276                   (t (read-token stream)))))))
277
278 (defun read-name-value-pairs (stream &key (value-required-p t) cookie-syntax)
279   "Uses READ-NAME-VALUE-PAIR to read and return an alist of
280 name/value pairs from STREAM.  It is assumed that the pairs are
281 separated by semicolons and that the first char read \(except for
282 whitespace) will be a semicolon.  The parameters are used as in
283 READ-NAME-VALUE-PAIR.  Stops reading in case of END-OF-FILE
284 \(instead of signaling an error)."
285   (loop for char = (skip-whitespace stream)
286         while (and char (char= char #\;))
287         do (read-char* stream)
288         ;; guard against a stray semicolon at the end
289         when (skip-whitespace stream)
290         collect (read-name-value-pair stream
291                                       :value-required-p value-required-p
292                                       :cookie-syntax cookie-syntax)))
Note: See TracBrowser for help on using the browser.