| 1 |
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CHUNGA; Base: 10 -*- |
|---|
| 2 |
;;; $Header: /usr/local/cvsrep/chunga/read.lisp,v 1.22 2008/05/26 08:18:00 edi Exp $ |
|---|
| 3 |
|
|---|
| 4 |
;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. |
|---|
| 5 |
|
|---|
| 6 |
;;; Redistribution and use in source and binary forms, with or without |
|---|
| 7 |
;;; modification, are permitted provided that the following conditions |
|---|
| 8 |
;;; are met: |
|---|
| 9 |
|
|---|
| 10 |
;;; * Redistributions of source code must retain the above copyright |
|---|
| 11 |
;;; notice, this list of conditions and the following disclaimer. |
|---|
| 12 |
|
|---|
| 13 |
;;; * Redistributions in binary form must reproduce the above |
|---|
| 14 |
;;; copyright notice, this list of conditions and the following |
|---|
| 15 |
;;; disclaimer in the documentation and/or other materials |
|---|
| 16 |
;;; provided with the distribution. |
|---|
| 17 |
|
|---|
| 18 |
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED |
|---|
| 19 |
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|---|
| 20 |
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|---|
| 21 |
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
|---|
| 22 |
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|---|
| 23 |
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE |
|---|
| 24 |
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|---|
| 25 |
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|---|
| 26 |
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|---|
| 27 |
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|---|
| 28 |
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 29 |
|
|---|
| 30 |
(in-package :chunga) |
|---|
| 31 |
|
|---|
| 32 |
(defun signal-unexpected-chars (last-char expected-chars) |
|---|
| 33 |
"Signals an error that LAST-CHAR was read although one of |
|---|
| 34 |
EXPECTED-CHARS was expected. \(Note that EXPECTED-CHARS, |
|---|
| 35 |
despites its name, can also be a single character instead of a |
|---|
| 36 |
list). Uses *CURRENT-ERROR-MESSAGE* if it's not NIL, or calls |
|---|
| 37 |
*CURRENT-ERROR-FUNCTION* otherwise." |
|---|
| 38 |
(cond (*current-error-function* |
|---|
| 39 |
(funcall *current-error-function* last-char expected-chars)) |
|---|
| 40 |
(*current-error-message* |
|---|
| 41 |
(error "~@[~A~%~]~:[End of file~;Read character ~:*~S~], but expected ~:[a member of ~S~;~S~]." |
|---|
| 42 |
*current-error-message* last-char (atom expected-chars) expected-chars)))) |
|---|
| 43 |
|
|---|
| 44 |
(defun charp (char) |
|---|
| 45 |
"Returns true if the Lisp character CHAR is a CHAR according to RFC 2616." |
|---|
| 46 |
(<= 0 (char-code char) 127)) |
|---|
| 47 |
|
|---|
| 48 |
(defun controlp (char) |
|---|
| 49 |
"Returns true if the Lisp character CHAR is a CTL according to RFC 2616." |
|---|
| 50 |
(or (<= 0 (char-code char) 31) |
|---|
| 51 |
(= (char-code char) 127))) |
|---|
| 52 |
|
|---|
| 53 |
(defun separatorp (char) |
|---|
| 54 |
"Returns true if the Lisp character CHAR is a separator |
|---|
| 55 |
according to RFC 2616." |
|---|
| 56 |
(find char #.(format nil " ()<>@,;:\\\"/[]?={}~C" #\Tab) |
|---|
| 57 |
:test #'char=)) |
|---|
| 58 |
|
|---|
| 59 |
(defun whitespacep (char) |
|---|
| 60 |
"Returns true if the Lisp character CHAR is whitespace |
|---|
| 61 |
according to RFC 2616." |
|---|
| 62 |
(member char '(#\Space #\Tab) :test #'char=)) |
|---|
| 63 |
|
|---|
| 64 |
(defun token-char-p (char) |
|---|
| 65 |
"Returns true if the Lisp character CHAR is a token constituent |
|---|
| 66 |
according to RFC 2616." |
|---|
| 67 |
(and (charp char) |
|---|
| 68 |
(not (or (controlp char) |
|---|
| 69 |
(separatorp char))))) |
|---|
| 70 |
|
|---|
| 71 |
(defun assert-char (stream expected-char) |
|---|
| 72 |
"Reads the next character from STREAM and checks if it is the |
|---|
| 73 |
character EXPECTED-CHAR. Signals an error otherwise." |
|---|
| 74 |
(let ((char (read-char* stream))) |
|---|
| 75 |
(unless (and char (char= char expected-char)) |
|---|
| 76 |
(signal-unexpected-chars char expected-char)) |
|---|
| 77 |
char)) |
|---|
| 78 |
|
|---|
| 79 |
(defun assert-crlf (stream) |
|---|
| 80 |
"Reads the next two characters from STREAM and checks if these |
|---|
| 81 |
are a carriage return and a linefeed. Signals an error |
|---|
| 82 |
otherwise." |
|---|
| 83 |
(assert-char stream #\Return) |
|---|
| 84 |
(assert-char stream #\Linefeed)) |
|---|
| 85 |
|
|---|
| 86 |
(defun read-line* (stream &optional log-stream) |
|---|
| 87 |
"Reads and assembles characters from the binary stream STREAM until |
|---|
| 88 |
a carriage return is read. Makes sure that the following character is |
|---|
| 89 |
a linefeed. If *ACCEPT-BOGUS-EOLS* is not NIL, then the function will |
|---|
| 90 |
also accept a lone carriage return or linefeed as an acceptable line |
|---|
| 91 |
break. Returns the string of characters read excluding the line |
|---|
| 92 |
break. Returns NIL if input ends before one character was read. |
|---|
| 93 |
Additionally logs this string to LOG-STREAM if it is not NIL." |
|---|
| 94 |
(let ((result |
|---|
| 95 |
(with-output-to-string (line) |
|---|
| 96 |
(loop for char-seen-p = nil then t |
|---|
| 97 |
for char = (read-char* stream) |
|---|
| 98 |
for is-cr-p = (and char (char= char #\Return)) |
|---|
| 99 |
until (or (null char) |
|---|
| 100 |
is-cr-p |
|---|
| 101 |
(and *accept-bogus-eols* |
|---|
| 102 |
(char= char #\Linefeed))) |
|---|
| 103 |
do (write-char char line) |
|---|
| 104 |
finally (cond ((and (not char-seen-p) |
|---|
| 105 |
(null char)) |
|---|
| 106 |
(return-from read-line* nil)) |
|---|
| 107 |
((not *accept-bogus-eols*) |
|---|
| 108 |
(assert-char stream #\Linefeed)) |
|---|
| 109 |
(is-cr-p |
|---|
| 110 |
(when (eql (peek-char* stream) #\Linefeed) |
|---|
| 111 |
(read-char* stream)))))))) |
|---|
| 112 |
(when log-stream |
|---|
| 113 |
(write-line result log-stream) |
|---|
| 114 |
(finish-output log-stream)) |
|---|
| 115 |
result)) |
|---|
| 116 |
|
|---|
| 117 |
(defun trim-whitespace (string) |
|---|
| 118 |
"Returns a version of the string STRING where spaces and tab |
|---|
| 119 |
characters are trimmed from the start and the end. Might return |
|---|
| 120 |
STRING." |
|---|
| 121 |
;; optimized version to replace STRING-TRIM, suggested by Jason Kantz |
|---|
| 122 |
(declare (optimize |
|---|
| 123 |
speed |
|---|
| 124 |
(safety 0) |
|---|
| 125 |
(space 0) |
|---|
| 126 |
(debug 1) |
|---|
| 127 |
(compilation-speed 0) |
|---|
| 128 |
#+:lispworks (hcl:fixnum-safety 0))) |
|---|
| 129 |
(declare (string string)) |
|---|
| 130 |
(let* ((length (length string)) |
|---|
| 131 |
(start (loop for i of-type fixnum from 0 below length |
|---|
| 132 |
while (or (char= #\space (char string i)) |
|---|
| 133 |
(char= #\tab (char string i))) |
|---|
| 134 |
finally (return i))) |
|---|
| 135 |
(end (loop for i of-type fixnum downfrom (1- length) to 0 |
|---|
| 136 |
while (or (char= #\space (char string i)) |
|---|
| 137 |
(char= #\tab (char string i))) |
|---|
| 138 |
finally (return (1+ i))))) |
|---|
| 139 |
(declare (fixnum start end)) |
|---|
| 140 |
(cond ((and (zerop start) (= end length)) string) |
|---|
| 141 |
(t (subseq string start end))))) |
|---|
| 142 |
|
|---|
| 143 |
(defun read-http-headers (stream &optional log-stream) |
|---|
| 144 |
"Reads HTTP header lines from STREAM \(except for the initial |
|---|
| 145 |
status line which is supposed to be read already) and returns a |
|---|
| 146 |
corresponding alist of names and values where the names are |
|---|
| 147 |
keywords and the values are strings. Multiple lines with the |
|---|
| 148 |
same name are combined into one value, the individual values |
|---|
| 149 |
separated by commas. Header lines which are spread across |
|---|
| 150 |
multiple lines are recognized and treated correctly. Additonally |
|---|
| 151 |
logs the header lines to LOG-STREAM if it is not NIL." |
|---|
| 152 |
(let (headers |
|---|
| 153 |
(*current-error-message* "While reading HTTP headers:")) |
|---|
| 154 |
(labels ((read-header-line () |
|---|
| 155 |
"Reads one header line, considering continuations." |
|---|
| 156 |
(with-output-to-string (header-line) |
|---|
| 157 |
(loop |
|---|
| 158 |
(let ((line (trim-whitespace (read-line* stream log-stream)))) |
|---|
| 159 |
(when (zerop (length line)) |
|---|
| 160 |
(return)) |
|---|
| 161 |
(write-sequence line header-line) |
|---|
| 162 |
(let ((next (peek-char* stream))) |
|---|
| 163 |
(unless (whitespacep next) |
|---|
| 164 |
(return))) |
|---|
| 165 |
;; we've seen whitespace starting a continutation, |
|---|
| 166 |
;; so we loop |
|---|
| 167 |
(write-char #\Space header-line))))) |
|---|
| 168 |
(split-header (line) |
|---|
| 169 |
"Splits line at colon and converts it into a cons. |
|---|
| 170 |
Returns NIL if LINE consists solely of whitespace." |
|---|
| 171 |
(unless (zerop (length (trim-whitespace line))) |
|---|
| 172 |
(let ((colon-pos (or (position #\: line :test #'char=) |
|---|
| 173 |
(error "Couldn't find colon in header line ~S." line)))) |
|---|
| 174 |
(cons (as-keyword (subseq line 0 colon-pos)) |
|---|
| 175 |
(trim-whitespace (subseq line (1+ colon-pos))))))) |
|---|
| 176 |
(add-header (pair) |
|---|
| 177 |
"Adds the name/value cons PAIR to HEADERS. Takes |
|---|
| 178 |
care of multiple headers with the same name." |
|---|
| 179 |
(let* ((name (car pair)) |
|---|
| 180 |
(existing-header (assoc name headers :test #'eq)) |
|---|
| 181 |
(existing-value (cdr existing-header))) |
|---|
| 182 |
(cond (existing-header |
|---|
| 183 |
(setf (cdr existing-header) |
|---|
| 184 |
(format nil "~A~:[,~;~]~A" |
|---|
| 185 |
existing-value |
|---|
| 186 |
(and *treat-semicolon-as-continuation* |
|---|
| 187 |
(eq name :set-cookie) |
|---|
| 188 |
(ends-with-p (trim-whitespace existing-value) ";")) |
|---|
| 189 |
(cdr pair)))) |
|---|
| 190 |
(t (push pair headers)))))) |
|---|
| 191 |
(loop for header-pair = (split-header (read-header-line)) |
|---|
| 192 |
while header-pair |
|---|
| 193 |
do (add-header header-pair))) |
|---|
| 194 |
(nreverse headers))) |
|---|
| 195 |
|
|---|
| 196 |
(defun skip-whitespace (stream) |
|---|
| 197 |
"Consume characters from STREAM until an END-OF-FILE is |
|---|
| 198 |
encountered or a non-whitespace \(according to RFC 2616) |
|---|
| 199 |
characters is seen. This character is returned \(or NIL in case |
|---|
| 200 |
of END-OF-FILE)." |
|---|
| 201 |
(loop for char = (peek-char* stream nil) |
|---|
| 202 |
while (and char (whitespacep char)) |
|---|
| 203 |
do (read-char* stream) |
|---|
| 204 |
finally (return char))) |
|---|
| 205 |
|
|---|
| 206 |
(defun read-token (stream) |
|---|
| 207 |
"Read characters from STREAM while they are token constituents |
|---|
| 208 |
\(according to RFC 2616). It is assumed that there's a token |
|---|
| 209 |
character at the current position. The token read is returned as |
|---|
| 210 |
a string. Doesn't signal an error \(but simply stops reading) if |
|---|
| 211 |
END-OF-FILE is encountered after the first character." |
|---|
| 212 |
(with-output-to-string (out) |
|---|
| 213 |
(loop for first = t then nil |
|---|
| 214 |
for char = (if first |
|---|
| 215 |
(peek-char* stream) |
|---|
| 216 |
(or (peek-char* stream nil) (return))) |
|---|
| 217 |
while (token-char-p char) |
|---|
| 218 |
do (write-char (read-char* stream) out)))) |
|---|
| 219 |
|
|---|
| 220 |
(defun read-quoted-string (stream) |
|---|
| 221 |
"Reads a quoted string \(according to RFC 2616). It is assumed |
|---|
| 222 |
that the character at the current position is the opening quote |
|---|
| 223 |
character. Returns the string read without quotes and escape |
|---|
| 224 |
characters." |
|---|
| 225 |
(read-char* stream) |
|---|
| 226 |
(with-output-to-string (out) |
|---|
| 227 |
(loop for char = (read-char* stream) |
|---|
| 228 |
until (char= char #\") |
|---|
| 229 |
do (case char |
|---|
| 230 |
(#\\ (write-char (read-char* stream) out)) |
|---|
| 231 |
(#\Return (assert-char stream #\Linefeed) |
|---|
| 232 |
(let ((char (read-char* stream))) |
|---|
| 233 |
(unless (whitespacep char) |
|---|
| 234 |
(signal-unexpected-chars char '(#\Space #\Tab))))) |
|---|
| 235 |
(otherwise (write-char char out)))))) |
|---|
| 236 |
|
|---|
| 237 |
(defun read-cookie-value (stream &key name separators) |
|---|
| 238 |
"Reads a cookie parameter value from STREAM which is returned as a |
|---|
| 239 |
string. Simply reads until a comma or a semicolon is seen \(or an |
|---|
| 240 |
element of SEPARATORS)." |
|---|
| 241 |
(when (eql #\, (peek-char* stream nil)) |
|---|
| 242 |
(return-from read-cookie-value "")) |
|---|
| 243 |
(trim-whitespace |
|---|
| 244 |
(with-output-to-string (out) |
|---|
| 245 |
;; special case for the `Expires' parameter - maybe skip the first comma |
|---|
| 246 |
(loop with separators% = (cond (separators) |
|---|
| 247 |
((equalp name "Expires") ";") |
|---|
| 248 |
(t ",;")) |
|---|
| 249 |
for char = (peek-char* stream nil) |
|---|
| 250 |
until (or (null char) (find char separators% :test #'char=)) |
|---|
| 251 |
when (and (null separators) |
|---|
| 252 |
(or (char= char #\,) |
|---|
| 253 |
(digit-char-p char))) |
|---|
| 254 |
do (setq separators% '(#\, #\;)) |
|---|
| 255 |
do (write-char (read-char* stream) out))))) |
|---|
| 256 |
|
|---|
| 257 |
(defun read-name-value-pair (stream &key (value-required-p t) cookie-syntax) |
|---|
| 258 |
"Reads a typical \(in RFC 2616) name/value or attribute/value |
|---|
| 259 |
combination from STREAM - a token followed by a #\\= character and |
|---|
| 260 |
another token or a quoted string. Returns a cons of name and value, |
|---|
| 261 |
both as strings. If VALUE-REQUIRED-P is NIL, the #\\= sign and the |
|---|
| 262 |
value are optional. If COOKIE-SYNTAX is true, uses READ-COOKIE-VALUE |
|---|
| 263 |
internally." |
|---|
| 264 |
(skip-whitespace stream) |
|---|
| 265 |
(let ((name (if cookie-syntax |
|---|
| 266 |
(read-cookie-value stream :separators "=,") |
|---|
| 267 |
(read-token stream)))) |
|---|
| 268 |
(skip-whitespace stream) |
|---|
| 269 |
(cons name |
|---|
| 270 |
(when (or value-required-p |
|---|
| 271 |
(eql (peek-char* stream nil) #\=)) |
|---|
| 272 |
(assert-char stream #\=) |
|---|
| 273 |
(skip-whitespace stream) |
|---|
| 274 |
(cond (cookie-syntax (read-cookie-value stream :name name)) |
|---|
| 275 |
((char= (peek-char* stream) #\") (read-quoted-string stream)) |
|---|
| 276 |
(t (read-token stream))))))) |
|---|
| 277 |
|
|---|
| 278 |
(defun read-name-value-pairs (stream &key (value-required-p t) cookie-syntax) |
|---|
| 279 |
"Uses READ-NAME-VALUE-PAIR to read and return an alist of |
|---|
| 280 |
name/value pairs from STREAM. It is assumed that the pairs are |
|---|
| 281 |
separated by semicolons and that the first char read \(except for |
|---|
| 282 |
whitespace) will be a semicolon. The parameters are used as in |
|---|
| 283 |
READ-NAME-VALUE-PAIR. Stops reading in case of END-OF-FILE |
|---|
| 284 |
\(instead of signaling an error)." |
|---|
| 285 |
(loop for char = (skip-whitespace stream) |
|---|
| 286 |
while (and char (char= char #\;)) |
|---|
| 287 |
do (read-char* stream) |
|---|
| 288 |
;; guard against a stray semicolon at the end |
|---|
| 289 |
when (skip-whitespace stream) |
|---|
| 290 |
collect (read-name-value-pair stream |
|---|
| 291 |
:value-required-p value-required-p |
|---|
| 292 |
:cookie-syntax cookie-syntax))) |
|---|