| 1 |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|---|
| 2 |
;;;; parse-mime.lisp: Tools for parsing a mime string/stream |
|---|
| 3 |
;;;; Copyright (C) 2004 Robert Marlow <bobstopper@bobturf.org> |
|---|
| 4 |
;;;; |
|---|
| 5 |
;;;; This library is free software; you can redistribute it and/or |
|---|
| 6 |
;;;; modify it under the terms of the GNU Library General Public |
|---|
| 7 |
;;;; License as published by the Free Software Foundation; either |
|---|
| 8 |
;;;; version 2 of the License, or (at your option) any later version. |
|---|
| 9 |
;;;; |
|---|
| 10 |
;;;; This library is distributed in the hope that it will be useful, |
|---|
| 11 |
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 12 |
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|---|
| 13 |
;;;; Library General Public License for more details. |
|---|
| 14 |
;;;; |
|---|
| 15 |
;;;; You should have received a copy of the GNU Library General Public |
|---|
| 16 |
;;;; License along with this library; if not, write to the |
|---|
| 17 |
;;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
|---|
| 18 |
;;;; Boston, MA 02111-1307, USA. |
|---|
| 19 |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|---|
| 20 |
|
|---|
| 21 |
|
|---|
| 22 |
|
|---|
| 23 |
(in-package :mime) |
|---|
| 24 |
|
|---|
| 25 |
(defgeneric parse-mime (mime &optional headers) |
|---|
| 26 |
(:documentation |
|---|
| 27 |
"Parse a string or stream containing a MIME message and return a mine |
|---|
| 28 |
object representing it or nil if the message is not MIME compatible")) |
|---|
| 29 |
|
|---|
| 30 |
|
|---|
| 31 |
(defmethod parse-mime ((mime string) &optional headers) |
|---|
| 32 |
(parse-mime (make-string-input-stream mime) headers)) |
|---|
| 33 |
|
|---|
| 34 |
|
|---|
| 35 |
(defmethod parse-mime ((mime stream) &optional headers) |
|---|
| 36 |
(catch 'bad-mime |
|---|
| 37 |
(let* ((headers (or headers (parse-headers mime))) |
|---|
| 38 |
(content-type-header (assoc :content-type headers)) |
|---|
| 39 |
(content-disposition-header (assoc :content-disposition headers)) |
|---|
| 40 |
(content-type-subtype (split "/" (header-value |
|---|
| 41 |
content-type-header))) |
|---|
| 42 |
(content-type (first content-type-subtype)) |
|---|
| 43 |
(content-subtype (second content-type-subtype)) |
|---|
| 44 |
(content-parm (header-parms content-type-header)) |
|---|
| 45 |
(content-disposition (header-value content-disposition-header)) |
|---|
| 46 |
(content-disposition-parm (when content-disposition-header |
|---|
| 47 |
(header-parms |
|---|
| 48 |
content-disposition-header))) |
|---|
| 49 |
(boundary nil) |
|---|
| 50 |
(mime-version (or (header-value (assoc :mime-version headers)) "1.0")) |
|---|
| 51 |
(mime-type (cond |
|---|
| 52 |
((equal content-type "text") 'text-mime) |
|---|
| 53 |
((equal content-type "multipart") 'multipart-mime) |
|---|
| 54 |
(t 'mime)))) |
|---|
| 55 |
|
|---|
| 56 |
(if (equal mime-version "1.0") |
|---|
| 57 |
|
|---|
| 58 |
(let* ((encoding (intern (string-upcase |
|---|
| 59 |
(or (cdr (assoc :content-transfer-encoding |
|---|
| 60 |
headers)) |
|---|
| 61 |
"7BIT")) |
|---|
| 62 |
:keyword)) |
|---|
| 63 |
(mime-obj-gen |
|---|
| 64 |
(list |
|---|
| 65 |
mime-type |
|---|
| 66 |
:type content-type |
|---|
| 67 |
:subtype content-subtype |
|---|
| 68 |
:encoding encoding |
|---|
| 69 |
:content-encoding encoding |
|---|
| 70 |
:description (cdr (assoc :content-description |
|---|
| 71 |
headers)) |
|---|
| 72 |
:id (remove #\< (remove #\> (cdr (assoc :content-id headers)))) |
|---|
| 73 |
:disposition content-disposition |
|---|
| 74 |
:disposition-parameters content-disposition-parm))) |
|---|
| 75 |
|
|---|
| 76 |
(case mime-type |
|---|
| 77 |
((text-mime) |
|---|
| 78 |
(setq mime-obj-gen |
|---|
| 79 |
(append mime-obj-gen |
|---|
| 80 |
(list |
|---|
| 81 |
:charset (cdr (assoc :charset content-parm)) |
|---|
| 82 |
:parameters (delete (assoc :charset content-parm) |
|---|
| 83 |
content-parm))))) |
|---|
| 84 |
((multipart-mime) |
|---|
| 85 |
(setq boundary (second (assoc :boundary content-parm))) |
|---|
| 86 |
(setq mime-obj-gen |
|---|
| 87 |
(append mime-obj-gen |
|---|
| 88 |
(list |
|---|
| 89 |
:boundary boundary |
|---|
| 90 |
:parameters (delete (assoc :boundary content-parm) |
|---|
| 91 |
content-parm) |
|---|
| 92 |
:prologue (get-prologue mime boundary))))) |
|---|
| 93 |
|
|---|
| 94 |
(t (setq mime-obj-gen |
|---|
| 95 |
(append mime-obj-gen (list :parameters content-parm))))) |
|---|
| 96 |
|
|---|
| 97 |
(setq mime-obj-gen |
|---|
| 98 |
(append mime-obj-gen |
|---|
| 99 |
(list :content (parse-body mime |
|---|
| 100 |
(ensure-keyword mime-type) |
|---|
| 101 |
boundary)))) |
|---|
| 102 |
|
|---|
| 103 |
(case mime-type |
|---|
| 104 |
((multipart-mime) |
|---|
| 105 |
(setq mime-obj-gen |
|---|
| 106 |
(append mime-obj-gen |
|---|
| 107 |
(list :epilogue (get-epilogue mime)))))) |
|---|
| 108 |
|
|---|
| 109 |
(apply #'make-instance mime-obj-gen)) |
|---|
| 110 |
|
|---|
| 111 |
;; If we decide this isn't MIME 1.0 compatible, we just return nil. |
|---|
| 112 |
nil)))) |
|---|
| 113 |
|
|---|
| 114 |
(defun parse-headers (stream) |
|---|
| 115 |
"Parses headers from a stream and converts them into keyword/value pairs" |
|---|
| 116 |
(let ((headers nil) |
|---|
| 117 |
(previous-line nil)) |
|---|
| 118 |
|
|---|
| 119 |
(read-lines (line stream) |
|---|
| 120 |
((equal line "") |
|---|
| 121 |
(if previous-line |
|---|
| 122 |
(push (create-header previous-line) headers)) |
|---|
| 123 |
headers) |
|---|
| 124 |
|
|---|
| 125 |
;; Headers beginning with whitespace are continuations |
|---|
| 126 |
;; from the header on the previous line. Headers not |
|---|
| 127 |
;; beginning with complete lines are starts of new headers |
|---|
| 128 |
(unless |
|---|
| 129 |
(register-groups-bind |
|---|
| 130 |
(next-line) |
|---|
| 131 |
("^\\s+(.+)" line) |
|---|
| 132 |
|
|---|
| 133 |
(setq previous-line |
|---|
| 134 |
(format nil "~A ~A" previous-line next-line))) |
|---|
| 135 |
(if previous-line |
|---|
| 136 |
(push (create-header previous-line) headers)) |
|---|
| 137 |
(setq previous-line line))))) |
|---|
| 138 |
|
|---|
| 139 |
|
|---|
| 140 |
(defun header-value (header) |
|---|
| 141 |
"Takes a header cons and returns the value component" |
|---|
| 142 |
(register-groups-bind (value) ("^([^;\\s]*)" (cdr header)) value)) |
|---|
| 143 |
|
|---|
| 144 |
|
|---|
| 145 |
(defun header-parms (header) |
|---|
| 146 |
"Takes a header cons and returns all parameters contained within" |
|---|
| 147 |
(extract-parms |
|---|
| 148 |
(regex-replace-all "\\(.*?\\)" |
|---|
| 149 |
(or (register-groups-bind |
|---|
| 150 |
(params) |
|---|
| 151 |
("^[^;\\s]*(;.*)$" (cdr header)) |
|---|
| 152 |
|
|---|
| 153 |
params) |
|---|
| 154 |
(return-from header-parms nil)) |
|---|
| 155 |
""))) |
|---|
| 156 |
|
|---|
| 157 |
|
|---|
| 158 |
(defun header-comments (header) |
|---|
| 159 |
"Returns all comments from the keyword/value header pair in HEADER" |
|---|
| 160 |
(extract-header-comments (cdr header))) |
|---|
| 161 |
|
|---|
| 162 |
|
|---|
| 163 |
(defun extract-header-comments (header-value-string &optional comment-list) |
|---|
| 164 |
"Takes a header string and optional list of already extracted comments and |
|---|
| 165 |
returns all comments contained within that string" |
|---|
| 166 |
(if (register-groups-bind |
|---|
| 167 |
(comment rest) |
|---|
| 168 |
("\\((.*?)\\)(.*)" header-value-string) |
|---|
| 169 |
|
|---|
| 170 |
(setq header-value-string rest) |
|---|
| 171 |
(setq comment-list (cons comment comment-list))) |
|---|
| 172 |
|
|---|
| 173 |
(extract-header-comments header-value-string comment-list) |
|---|
| 174 |
(reverse comment-list))) |
|---|
| 175 |
|
|---|
| 176 |
|
|---|
| 177 |
(defun create-header (header-string) |
|---|
| 178 |
"Takes a header string and returns a keyword/value header pair" |
|---|
| 179 |
(register-groups-bind |
|---|
| 180 |
(header-name header-value) |
|---|
| 181 |
("^([^:\\s]+):\\s*(.*)$" header-string) |
|---|
| 182 |
|
|---|
| 183 |
(cons (ensure-keyword header-name) |
|---|
| 184 |
header-value))) |
|---|
| 185 |
|
|---|
| 186 |
|
|---|
| 187 |
(defun extract-parms (parm-string &optional parms) |
|---|
| 188 |
"Takes a string of parameters and returns a list of keyword/value |
|---|
| 189 |
parameter pairs" |
|---|
| 190 |
(if (register-groups-bind |
|---|
| 191 |
(parm-name parm-value rest) |
|---|
| 192 |
(";\\s*(.*?)=\"?([^;\"\\s]*)\"?[\\s]*(;?.*)" parm-string) |
|---|
| 193 |
|
|---|
| 194 |
(setq parm-string rest) |
|---|
| 195 |
(setq parms (cons (list (ensure-keyword parm-name) parm-value) |
|---|
| 196 |
parms))) |
|---|
| 197 |
|
|---|
| 198 |
(extract-parms parm-string parms) |
|---|
| 199 |
parms)) |
|---|
| 200 |
|
|---|
| 201 |
|
|---|
| 202 |
(defgeneric parse-body (body mime-type &optional boundary) |
|---|
| 203 |
(:documentation |
|---|
| 204 |
"Parses a mime body within the context of the mime type expected. |
|---|
| 205 |
Assumes the stream's position is already at the body. If it's not, |
|---|
| 206 |
you should call parse headers first or read through to the first null |
|---|
| 207 |
line.")) |
|---|
| 208 |
|
|---|
| 209 |
|
|---|
| 210 |
(defmethod parse-body ((body string) (mime-type (eql :mime)) |
|---|
| 211 |
&optional boundary) |
|---|
| 212 |
(declare (ignore boundary)) |
|---|
| 213 |
body) |
|---|
| 214 |
|
|---|
| 215 |
|
|---|
| 216 |
(defmethod parse-body ((body stream) (mime-type (eql :mime)) |
|---|
| 217 |
&optional boundary) |
|---|
| 218 |
(declare (ignore boundary)) |
|---|
| 219 |
(read-stream-to-string body line)) |
|---|
| 220 |
|
|---|
| 221 |
|
|---|
| 222 |
(defmethod parse-body ((body string) (mime-type (eql :text-mime)) |
|---|
| 223 |
&optional boundary) |
|---|
| 224 |
(declare (ignore boundary)) |
|---|
| 225 |
body) |
|---|
| 226 |
|
|---|
| 227 |
|
|---|
| 228 |
(defmethod parse-body ((body stream) (mime-type (eql :text-mime)) |
|---|
| 229 |
&optional boundary) |
|---|
| 230 |
(declare (ignore boundary)) |
|---|
| 231 |
(read-stream-to-string body line)) |
|---|
| 232 |
|
|---|
| 233 |
|
|---|
| 234 |
(defmethod parse-body ((body string) (mime-type (eql :multipart-mime)) |
|---|
| 235 |
&optional boundary) |
|---|
| 236 |
(parse-body (make-string-input-stream body) mime-type boundary)) |
|---|
| 237 |
|
|---|
| 238 |
|
|---|
| 239 |
(defmethod parse-body ((body stream) (mime-type (eql :multipart-mime)) |
|---|
| 240 |
&optional boundary) |
|---|
| 241 |
(multiple-value-bind (part end-type) (read-until-boundary body boundary) |
|---|
| 242 |
(cons (parse-mime part) |
|---|
| 243 |
(case end-type |
|---|
| 244 |
((end-part) (parse-body body mime-type boundary)) |
|---|
| 245 |
((end-mime) nil) |
|---|
| 246 |
((eof) (throw 'bad-mime nil)) |
|---|
| 247 |
(t (throw 'bad-mime "Unexpected Parse Return Value")))))) |
|---|
| 248 |
|
|---|
| 249 |
|
|---|
| 250 |
(defgeneric get-prologue (body boundary) |
|---|
| 251 |
(:documentation "Grab the prologue from a Multipart MIME message")) |
|---|
| 252 |
|
|---|
| 253 |
|
|---|
| 254 |
(defmethod get-prologue ((body string) boundary) |
|---|
| 255 |
(get-prologue (make-string-input-stream body) boundary)) |
|---|
| 256 |
|
|---|
| 257 |
|
|---|
| 258 |
(defmethod get-prologue ((body stream) boundary) |
|---|
| 259 |
(multiple-value-bind (text end-type) (read-until-boundary body boundary) |
|---|
| 260 |
(case end-type |
|---|
| 261 |
((end-part) text) |
|---|
| 262 |
((end-mime eof) (throw 'bad-mime nil)) |
|---|
| 263 |
(t (throw 'bad-mime "Unexpected Parse Return Value"))))) |
|---|
| 264 |
|
|---|
| 265 |
|
|---|
| 266 |
(defgeneric get-epilogue (body) |
|---|
| 267 |
(:documentation "Grab the prologue from a Multipart MIME message")) |
|---|
| 268 |
|
|---|
| 269 |
|
|---|
| 270 |
(defmethod get-epilogue ((body string)) |
|---|
| 271 |
(read-stream-to-string (make-string-input-stream body) line)) |
|---|
| 272 |
|
|---|
| 273 |
|
|---|
| 274 |
(defmethod get-epilogue ((body stream)) |
|---|
| 275 |
(read-stream-to-string body line)) |
|---|
| 276 |
|
|---|
| 277 |
|
|---|
| 278 |
(defun read-until-boundary (stream boundary) |
|---|
| 279 |
"Reads a MIME body from STREAM until it reaches a boundary defined by |
|---|
| 280 |
BOUNDARY" |
|---|
| 281 |
(let ((end-type 'eof) |
|---|
| 282 |
(actual-boundary (concatenate 'string "--" boundary))) |
|---|
| 283 |
(values |
|---|
| 284 |
(read-stream-to-string |
|---|
| 285 |
stream line |
|---|
| 286 |
|
|---|
| 287 |
(or (and (equal (delete #\return line) actual-boundary) |
|---|
| 288 |
(setq end-type 'end-part)) |
|---|
| 289 |
(and (equal (delete #\return line) (concatenate 'string |
|---|
| 290 |
actual-boundary |
|---|
| 291 |
"--")) |
|---|
| 292 |
(setq end-type 'end-mime)))) |
|---|
| 293 |
end-type))) |
|---|
| 294 |
|
|---|
| 295 |
|
|---|
| 296 |
(defparameter *mime-types-file* |
|---|
| 297 |
(make-pathname :directory '(:absolute "etc") |
|---|
| 298 |
:name "mime" |
|---|
| 299 |
:type "types")) |
|---|
| 300 |
|
|---|
| 301 |
|
|---|
| 302 |
(defun lookup-mime (pathname &optional mime-types-file) |
|---|
| 303 |
"Takes a PATHNAME argument and uses MIME-TYPES-FILE (or the system |
|---|
| 304 |
default) to determine the mime type of PATHNAME. Returns two values: |
|---|
| 305 |
the content type and the the content subtype" |
|---|
| 306 |
(let ((extension (pathname-type pathname))) |
|---|
| 307 |
(with-open-file |
|---|
| 308 |
(mime (or mime-types-file *mime-types-file*) :direction :input) |
|---|
| 309 |
(read-lines |
|---|
| 310 |
(line mime) |
|---|
| 311 |
((register-groups-bind |
|---|
| 312 |
(extensions) |
|---|
| 313 |
("^[^#\\s]+\\s+([^#]+)" line) |
|---|
| 314 |
(find extension (split "\\s+" extensions) |
|---|
| 315 |
:test #'string-equal)) |
|---|
| 316 |
(if (eq line 'eof) |
|---|
| 317 |
(values "application" "octet-stream") |
|---|
| 318 |
(register-groups-bind |
|---|
| 319 |
(content-type content-subtype) |
|---|
| 320 |
("^([^\/]+)\/([^\\s]+)" line) |
|---|
| 321 |
(values (or content-type "application") |
|---|
| 322 |
(or content-subtype "octet-stream"))))))))) |
|---|