| 1 |
;;; cl-pdf copyright 2002-2005 Marc Battyani see license.txt for the details |
|---|
| 2 |
;;; You can reach me at marc.battyani@fractalconcept.com or marc@battyani.net |
|---|
| 3 |
;;; The homepage of cl-pdf is here: http://www.fractalconcept.com/asp/html/cl-pdf.html |
|---|
| 4 |
|
|---|
| 5 |
;; Thanks to Arthur Lemmens for the recursive descent parser :) |
|---|
| 6 |
;; (It's much nicer than my original LALR parser ;-) |
|---|
| 7 |
;; See the example at the end for usage. |
|---|
| 8 |
|
|---|
| 9 |
(in-package #:pdf) |
|---|
| 10 |
|
|---|
| 11 |
(defvar *pdf-input-stream* nil) |
|---|
| 12 |
(defvar *indirect-objects* nil) |
|---|
| 13 |
|
|---|
| 14 |
(defun find-indirect-object (obj-number gen-number) |
|---|
| 15 |
(let ((object.pos (gethash (cons obj-number gen-number) *indirect-objects*))) |
|---|
| 16 |
(if object.pos |
|---|
| 17 |
(values (car object.pos) (cdr object.pos)) |
|---|
| 18 |
(make-indirect-object obj-number gen-number 0)))) |
|---|
| 19 |
|
|---|
| 20 |
(defun make-indirect-object (obj-number gen-number position) |
|---|
| 21 |
(let ((object (or (car (gethash (cons obj-number gen-number) *indirect-objects*)) |
|---|
| 22 |
(make-instance 'indirect-object |
|---|
| 23 |
:obj-number obj-number |
|---|
| 24 |
:gen-number gen-number |
|---|
| 25 |
:content :unread |
|---|
| 26 |
:no-link t)))) |
|---|
| 27 |
(setf (gethash (cons obj-number gen-number) *indirect-objects*) (cons object position)) |
|---|
| 28 |
object)) |
|---|
| 29 |
|
|---|
| 30 |
(defun read-indirect-object (obj-number gen-number) |
|---|
| 31 |
(multiple-value-bind (object position) (find-indirect-object obj-number gen-number) |
|---|
| 32 |
(when (eq (content object) :unread) |
|---|
| 33 |
(setf (content object) (read-indirect-object-content position))) |
|---|
| 34 |
object)) |
|---|
| 35 |
|
|---|
| 36 |
(defun load-indirect-object (object) |
|---|
| 37 |
(when (eq (content object) :unread) |
|---|
| 38 |
(read-indirect-object (obj-number object) (gen-number object))) |
|---|
| 39 |
object) |
|---|
| 40 |
|
|---|
| 41 |
(defun delete-indirect-object (object) |
|---|
| 42 |
(remhash (cons (obj-number object) (gen-number object)) *indirect-objects*)) |
|---|
| 43 |
|
|---|
| 44 |
(defun load-all-indirect-objects () |
|---|
| 45 |
(loop for object.pos being the hash-value of *indirect-objects* |
|---|
| 46 |
do (load-indirect-object (car object.pos)))) |
|---|
| 47 |
|
|---|
| 48 |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|---|
| 49 |
;;; Parser |
|---|
| 50 |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|---|
| 51 |
|
|---|
| 52 |
(define-condition pdf-parse-error () |
|---|
| 53 |
((stream :initarg :stream :reader pdf-parse-error-stream) |
|---|
| 54 |
(message :initarg :message :reader pdf-parse-error-message)) |
|---|
| 55 |
(:report (lambda (condition stream) |
|---|
| 56 |
(format stream "~&Error at position ~D while reading PDF document.~%~A~%~A~%" (file-position (pdf-parse-error-stream condition)) (pdf-parse-error-message condition) |
|---|
| 57 |
(pdf-parse-error-stream condition))))) |
|---|
| 58 |
|
|---|
| 59 |
(defun unexpected-character-error (char) |
|---|
| 60 |
(cerror "Ignore the character and continue." |
|---|
| 61 |
'pdf-parse-error |
|---|
| 62 |
:stream *pdf-input-stream* |
|---|
| 63 |
:message (format nil "Unexpected character ~S at ~A." char |
|---|
| 64 |
(file-position *pdf-input-stream*)))) |
|---|
| 65 |
|
|---|
| 66 |
(defvar +white-char+ (coerce '(#\Space #\Newline #\Return #\Tab #\Null #\Page) 'string)) |
|---|
| 67 |
|
|---|
| 68 |
(defun white-char-p (c) |
|---|
| 69 |
(find c +white-char+)) |
|---|
| 70 |
|
|---|
| 71 |
(defun octal-digit-char-p (char) |
|---|
| 72 |
(find char "01234567")) |
|---|
| 73 |
|
|---|
| 74 |
(defun hex-digit-char-p (char) |
|---|
| 75 |
(find char "0123456789ABCDEF")) |
|---|
| 76 |
|
|---|
| 77 |
(defun numeric-char-p (char) |
|---|
| 78 |
(find char "0123456789-.")) |
|---|
| 79 |
|
|---|
| 80 |
(defun name-char-p (char) |
|---|
| 81 |
(and (<= #x21 (char-code char) #x7E) |
|---|
| 82 |
(not (find char "%()<>[]{}/#")))) |
|---|
| 83 |
|
|---|
| 84 |
;;; Skipping characters |
|---|
| 85 |
|
|---|
| 86 |
(defun eat-char (expected-char) |
|---|
| 87 |
(let ((char (read-char *pdf-input-stream*))) |
|---|
| 88 |
(unless (char= char expected-char) |
|---|
| 89 |
(cerror "Ignore the character and continue." |
|---|
| 90 |
'pdf-parse-error |
|---|
| 91 |
:stream *pdf-input-stream* |
|---|
| 92 |
:message (format nil "Unexpected character ~S (expected ~S) at ~A." |
|---|
| 93 |
char expected-char (file-position *pdf-input-stream*)))))) |
|---|
| 94 |
|
|---|
| 95 |
(defun eat-chars (chars) |
|---|
| 96 |
(loop for char across chars |
|---|
| 97 |
do (eat-char char))) |
|---|
| 98 |
|
|---|
| 99 |
(defun eat-keyword (keyword) |
|---|
| 100 |
(skip-whitespace t) |
|---|
| 101 |
(eat-chars keyword) |
|---|
| 102 |
(skip-whitespace t)) |
|---|
| 103 |
|
|---|
| 104 |
(defun skip-whitespace (eof-error-p &key (skip-comments t)) |
|---|
| 105 |
(loop |
|---|
| 106 |
(let ((char (read-char *pdf-input-stream* eof-error-p))) |
|---|
| 107 |
(cond ((null char) |
|---|
| 108 |
(return :eof)) |
|---|
| 109 |
((and skip-comments (eql char #\%)) |
|---|
| 110 |
(loop for c = (read-char *pdf-input-stream* nil) |
|---|
| 111 |
until (or (null c) (eq c #\Newline)))) |
|---|
| 112 |
((not (white-char-p char)) |
|---|
| 113 |
(unread-char char *pdf-input-stream*) |
|---|
| 114 |
(return char)))))) |
|---|
| 115 |
|
|---|
| 116 |
;;; PDF Basic objects (see Chapter 4) |
|---|
| 117 |
|
|---|
| 118 |
(defun read-object (&optional (eof-error-p t)) |
|---|
| 119 |
"Returns one of the following PDF objects: boolean (:true or :false), |
|---|
| 120 |
number (Lisp number), string (Lisp string), name (Lisp symbol in the PDF |
|---|
| 121 |
package), array (Lisp vector), dictionary (Lisp property list), stream |
|---|
| 122 |
(Lisp pdf-stream) or null (Lisp NIL). When EOF-ERRORP is nil, it returns |
|---|
| 123 |
:eof for the end of the stream (otherwise it signals an error)." |
|---|
| 124 |
(skip-whitespace eof-error-p) |
|---|
| 125 |
(let ((char (peek-char nil *pdf-input-stream* eof-error-p))) |
|---|
| 126 |
(cond ((numeric-char-p char) |
|---|
| 127 |
(read-number)) |
|---|
| 128 |
((eql char #\() |
|---|
| 129 |
(eat-char #\() |
|---|
| 130 |
(read-pdf-string)) |
|---|
| 131 |
((eql char #\/) |
|---|
| 132 |
(eat-char #\/) |
|---|
| 133 |
(read-name )) |
|---|
| 134 |
((eql char #\[) |
|---|
| 135 |
(eat-char #\[) |
|---|
| 136 |
(read-array)) |
|---|
| 137 |
((eql char #\<) |
|---|
| 138 |
(eat-char #\<) |
|---|
| 139 |
(let ((next-char (peek-char nil *pdf-input-stream*))) |
|---|
| 140 |
(if (char= next-char #\<) |
|---|
| 141 |
(progn (eat-char #\<) |
|---|
| 142 |
(read-dictionary-or-stream)) |
|---|
| 143 |
(read-hex-string)))) |
|---|
| 144 |
((eql char #\t) |
|---|
| 145 |
(eat-chars "true") |
|---|
| 146 |
"true") |
|---|
| 147 |
((eql char #\f) |
|---|
| 148 |
(eat-chars "false") |
|---|
| 149 |
"false") |
|---|
| 150 |
((eql char #\n) |
|---|
| 151 |
(eat-chars "null") |
|---|
| 152 |
nil) |
|---|
| 153 |
((eql char #\e) |
|---|
| 154 |
;; this is probably an empty indirect object. WRITE-OBJECT |
|---|
| 155 |
;; can write them, so we should be able to read them too. |
|---|
| 156 |
nil) |
|---|
| 157 |
(t (unexpected-character-error char))))) |
|---|
| 158 |
|
|---|
| 159 |
(defun read-number () |
|---|
| 160 |
(read-from-string |
|---|
| 161 |
(with-output-to-string (s) |
|---|
| 162 |
(loop for char = (read-char *pdf-input-stream* nil nil) |
|---|
| 163 |
until (or (null char) (not (numeric-char-p char))) |
|---|
| 164 |
do (write-char char s) |
|---|
| 165 |
finally (when char |
|---|
| 166 |
(unread-char char *pdf-input-stream*)))))) |
|---|
| 167 |
|
|---|
| 168 |
;;; Strings |
|---|
| 169 |
|
|---|
| 170 |
(defun read-pdf-string () |
|---|
| 171 |
;; TODO: Deal with encodings. |
|---|
| 172 |
(with-output-to-string (s) |
|---|
| 173 |
(write-char #\( s) |
|---|
| 174 |
(loop for prev-char = #\( then char |
|---|
| 175 |
for char = (read-char *pdf-input-stream*) |
|---|
| 176 |
until (and (char= char #\))(not (char= prev-char #\\))) |
|---|
| 177 |
do (write-char char s)) |
|---|
| 178 |
(write-char #\) s))) |
|---|
| 179 |
|
|---|
| 180 |
(defun read-hex-string () |
|---|
| 181 |
(with-output-to-string (s) |
|---|
| 182 |
(write-char #\< s) |
|---|
| 183 |
(loop for char = (read-char *pdf-input-stream*) |
|---|
| 184 |
until (char= char #\>) |
|---|
| 185 |
do (write-char char s)) |
|---|
| 186 |
(write-char #\> s))) |
|---|
| 187 |
|
|---|
| 188 |
(defun parse-hex (digit-1 digit-2) |
|---|
| 189 |
(flet ((parse-digit (digit) |
|---|
| 190 |
(- (char-code digit) |
|---|
| 191 |
(if (char<= #\0 digit #\9) #.(char-code #\0) #.(- (char-code #\A) 10))))) |
|---|
| 192 |
(+ (* 16 (parse-digit (char-upcase digit-1))) |
|---|
| 193 |
(parse-digit (char-upcase digit-2))))) |
|---|
| 194 |
|
|---|
| 195 |
;;; Names |
|---|
| 196 |
|
|---|
| 197 |
(defun read-name () |
|---|
| 198 |
(with-output-to-string (s) |
|---|
| 199 |
(write-char #\/ s) |
|---|
| 200 |
(loop (let ((char (read-char *pdf-input-stream* nil nil))) |
|---|
| 201 |
(cond ((null char) (return)) |
|---|
| 202 |
((eql char #\#) |
|---|
| 203 |
(let ((digit-1 (read-char *pdf-input-stream*)) |
|---|
| 204 |
(digit-2 (read-char *pdf-input-stream*))) |
|---|
| 205 |
(unless (and (hex-digit-char-p digit-1) |
|---|
| 206 |
(hex-digit-char-p digit-2)) |
|---|
| 207 |
(error 'pdf-parse-error |
|---|
| 208 |
:stream *pdf-input-stream* |
|---|
| 209 |
:message "Illegal hexadecimal escape sequence in PDF name.")) |
|---|
| 210 |
(write-char (code-char (parse-hex digit-1 digit-2)) |
|---|
| 211 |
s))) |
|---|
| 212 |
((name-char-p char) |
|---|
| 213 |
(write-char char s)) |
|---|
| 214 |
(t (unread-char char *pdf-input-stream*) |
|---|
| 215 |
(return))))))) |
|---|
| 216 |
|
|---|
| 217 |
;;; Arrays |
|---|
| 218 |
|
|---|
| 219 |
(defun read-array () |
|---|
| 220 |
;; #\[ has already been read |
|---|
| 221 |
(let ((stack '())) |
|---|
| 222 |
(loop (skip-whitespace t) |
|---|
| 223 |
(let ((char (peek-char nil *pdf-input-stream*))) |
|---|
| 224 |
(case char |
|---|
| 225 |
( #\] |
|---|
| 226 |
(eat-char #\]) |
|---|
| 227 |
(return)) |
|---|
| 228 |
( #\R |
|---|
| 229 |
(eat-char #\R) |
|---|
| 230 |
;; Reduce last two numbers to indirect-reference |
|---|
| 231 |
(let ((generation-number (pop stack)) |
|---|
| 232 |
(object-number (pop stack))) |
|---|
| 233 |
(assert (integerp generation-number)) |
|---|
| 234 |
(assert (integerp object-number)) |
|---|
| 235 |
(push (find-indirect-object object-number generation-number) stack))) |
|---|
| 236 |
(otherwise |
|---|
| 237 |
(push (read-object) stack))))) |
|---|
| 238 |
(make-array (length stack) |
|---|
| 239 |
:initial-contents (nreverse stack)))) |
|---|
| 240 |
|
|---|
| 241 |
;;; Dictionaries and streams |
|---|
| 242 |
|
|---|
| 243 |
(defun read-dictionary-properties () |
|---|
| 244 |
(let ((plist '())) |
|---|
| 245 |
(loop (skip-whitespace t) |
|---|
| 246 |
(let ((char (peek-char nil *pdf-input-stream*))) |
|---|
| 247 |
(case char |
|---|
| 248 |
( #\> |
|---|
| 249 |
(eat-chars ">>") |
|---|
| 250 |
(return)) |
|---|
| 251 |
( #\R |
|---|
| 252 |
(eat-char #\R) |
|---|
| 253 |
;; Reduce last two numbers to indirect-object |
|---|
| 254 |
(let ((generation-number (pop plist)) |
|---|
| 255 |
(object-number (pop plist))) |
|---|
| 256 |
(assert (integerp generation-number)) |
|---|
| 257 |
(assert (integerp object-number)) |
|---|
| 258 |
(push (find-indirect-object object-number generation-number) plist))) |
|---|
| 259 |
(otherwise |
|---|
| 260 |
(push (read-object t) |
|---|
| 261 |
plist))))) |
|---|
| 262 |
(loop for (k v . rest) on (nreverse plist) by #'cddr |
|---|
| 263 |
collect (cons k v)))) |
|---|
| 264 |
|
|---|
| 265 |
(defun read-dictionary () |
|---|
| 266 |
(make-instance 'dictionary :dict-values (read-dictionary-properties))) |
|---|
| 267 |
|
|---|
| 268 |
(defun read-dictionary-or-stream () |
|---|
| 269 |
(let ((properties (read-dictionary-properties))) |
|---|
| 270 |
;; Check if dictionary is followed by a stream |
|---|
| 271 |
(skip-whitespace nil) |
|---|
| 272 |
(let ((char (peek-char nil *pdf-input-stream* nil nil))) |
|---|
| 273 |
(cond ((eql char #\s) |
|---|
| 274 |
;; Don't use EAT-KEYWORD here, because it may eat too many newlines! |
|---|
| 275 |
(eat-chars "stream") |
|---|
| 276 |
(read-line *pdf-input-stream*) |
|---|
| 277 |
(read-pdf-stream properties)) |
|---|
| 278 |
(t (make-instance 'dictionary :dict-values properties)))))) |
|---|
| 279 |
|
|---|
| 280 |
(defun read-pdf-stream (properties) |
|---|
| 281 |
(let ((length (cdr (assoc "/Length" properties :test #'string=)))) |
|---|
| 282 |
(when (typep length 'indirect-object) |
|---|
| 283 |
(let ((saved-filepos (file-position *pdf-input-stream*))) |
|---|
| 284 |
(setq length (content (load-indirect-object length))) |
|---|
| 285 |
(file-position *pdf-input-stream* saved-filepos))) |
|---|
| 286 |
(assert (integerp length)) |
|---|
| 287 |
(let ((content (loop repeat (ceiling length array-total-size-limit) |
|---|
| 288 |
for buffer-size = (min length array-total-size-limit) |
|---|
| 289 |
;; while (plusp buffer-size) |
|---|
| 290 |
collect (let* ((buffer (make-string buffer-size)) |
|---|
| 291 |
(bytes-read (read-sequence buffer *pdf-input-stream*))) |
|---|
| 292 |
(when (< bytes-read buffer-size) |
|---|
| 293 |
(error 'pdf-parse-error |
|---|
| 294 |
:stream *pdf-input-stream* |
|---|
| 295 |
:message "Unexpected end of PDF-stream.")) |
|---|
| 296 |
buffer) |
|---|
| 297 |
do (decf length buffer-size)))) |
|---|
| 298 |
(eat-keyword "endstream") |
|---|
| 299 |
(make-instance 'pdf-stream :dict-values properties :content content :no-compression t)))) |
|---|
| 300 |
|
|---|
| 301 |
;;; Integers |
|---|
| 302 |
|
|---|
| 303 |
(defun read-integer () |
|---|
| 304 |
(let ((object (read-object))) |
|---|
| 305 |
(unless (integerp object) |
|---|
| 306 |
(error 'pdf-parse-error |
|---|
| 307 |
:stream *pdf-input-stream* |
|---|
| 308 |
:message "Integer expected.")) |
|---|
| 309 |
object)) |
|---|
| 310 |
|
|---|
| 311 |
;;; Indirect objects |
|---|
| 312 |
|
|---|
| 313 |
(defun read-indirect-object-content (file-position) |
|---|
| 314 |
(file-position *pdf-input-stream* file-position) |
|---|
| 315 |
(let* ((object-number (read-integer)) |
|---|
| 316 |
(generation-number (read-integer))) |
|---|
| 317 |
(eat-keyword "obj") |
|---|
| 318 |
(let ((object (read-object))) |
|---|
| 319 |
;; Some producers forget the "endobj" at the end of a stream |
|---|
| 320 |
;; Let's try to be tolerant. |
|---|
| 321 |
(skip-whitespace t) |
|---|
| 322 |
(when (char= #\e (peek-char nil *pdf-input-stream*)) |
|---|
| 323 |
(eat-keyword "endobj")) |
|---|
| 324 |
object))) |
|---|
| 325 |
|
|---|
| 326 |
;;; xref |
|---|
| 327 |
|
|---|
| 328 |
(defun read-xref-and-trailer (position) |
|---|
| 329 |
(let (first-trailer) |
|---|
| 330 |
(loop |
|---|
| 331 |
(read-cross-reference-subsections position) |
|---|
| 332 |
(let* ((trailer (read-trailer))) |
|---|
| 333 |
(unless first-trailer (setf first-trailer trailer)) |
|---|
| 334 |
(let ((prev-position (get-dict-value trailer "/Prev"))) |
|---|
| 335 |
(if prev-position |
|---|
| 336 |
(setq position prev-position) |
|---|
| 337 |
(return first-trailer))))))) |
|---|
| 338 |
|
|---|
| 339 |
(defun read-cross-reference-subsections (position) |
|---|
| 340 |
(file-position *pdf-input-stream* position) |
|---|
| 341 |
(eat-keyword "xref") |
|---|
| 342 |
(loop |
|---|
| 343 |
;; the eat-keyword skips whitespace the first time, but there can |
|---|
| 344 |
;; be more whitespace the second time around. |
|---|
| 345 |
(skip-whitespace t) |
|---|
| 346 |
(let ((char (peek-char nil *pdf-input-stream*))) |
|---|
| 347 |
(cond ((char= char #\t) (return)) |
|---|
| 348 |
(t (read-cross-reference-subsection)))))) |
|---|
| 349 |
|
|---|
| 350 |
(defun read-cross-reference-subsection () |
|---|
| 351 |
(let ((first-entry (read-integer)) |
|---|
| 352 |
(nr-entries (read-integer))) |
|---|
| 353 |
(loop repeat nr-entries |
|---|
| 354 |
for number from first-entry |
|---|
| 355 |
do (read-cross-reference-entry number)))) |
|---|
| 356 |
|
|---|
| 357 |
(defun read-cross-reference-entry (number) |
|---|
| 358 |
(let ((position (read-integer))) |
|---|
| 359 |
(skip-whitespace nil) |
|---|
| 360 |
(let ((gen (read-integer))) |
|---|
| 361 |
(skip-whitespace nil) |
|---|
| 362 |
(let ((type (read-char *pdf-input-stream*))) |
|---|
| 363 |
(skip-whitespace nil) |
|---|
| 364 |
(if (char= type #\n) |
|---|
| 365 |
(make-indirect-object number gen position)))))) |
|---|
| 366 |
|
|---|
| 367 |
(defun read-trailer () |
|---|
| 368 |
(eat-keyword "trailer") |
|---|
| 369 |
(skip-whitespace t) |
|---|
| 370 |
(eat-chars "<<") |
|---|
| 371 |
(read-dictionary)) |
|---|
| 372 |
|
|---|
| 373 |
(defconstant +xref-search-size+ 1024 |
|---|
| 374 |
"Read this many bytes at end of file to look for 'startxref'") |
|---|
| 375 |
|
|---|
| 376 |
(defun find-cross-reference-start () |
|---|
| 377 |
(let ((file-length (file-length *pdf-input-stream*)) |
|---|
| 378 |
(buffer (make-string +xref-search-size+))) |
|---|
| 379 |
(file-position *pdf-input-stream* (- file-length +xref-search-size+)) |
|---|
| 380 |
(read-sequence buffer *pdf-input-stream*) |
|---|
| 381 |
(let ((position (search "startxref" buffer))) |
|---|
| 382 |
(unless position |
|---|
| 383 |
(error 'pdf-parse-error :stream *pdf-input-stream* |
|---|
| 384 |
:message "Can't find start address of cross-reference table.")) |
|---|
| 385 |
(parse-integer buffer :start (+ position 10) :junk-allowed t)))) |
|---|
| 386 |
|
|---|
| 387 |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|---|
| 388 |
;;; Higher level Stuff |
|---|
| 389 |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|---|
| 390 |
|
|---|
| 391 |
(defun renumber-all-indirect-objects () |
|---|
| 392 |
"Gives all indirect objects a consecutive numbering. |
|---|
| 393 |
Returns the first unused object-number." |
|---|
| 394 |
(let ((objects (make-array (hash-table-count *indirect-objects*) |
|---|
| 395 |
:fill-pointer 0 :adjustable t |
|---|
| 396 |
:initial-element nil))) |
|---|
| 397 |
(setf (objects *document*) objects) |
|---|
| 398 |
(setf (last-object-number *document*) (hash-table-count *indirect-objects*)) |
|---|
| 399 |
(loop for object.pos being the hash-value of *indirect-objects* |
|---|
| 400 |
for object = (car object.pos) |
|---|
| 401 |
for number from 1 do |
|---|
| 402 |
(setf (obj-number object) number |
|---|
| 403 |
(gen-number object) 0) |
|---|
| 404 |
(vector-push-extend object objects)))) |
|---|
| 405 |
|
|---|
| 406 |
(defun read-pdf-file (file) |
|---|
| 407 |
(let ((*indirect-objects* (make-hash-table :test #'equal)) |
|---|
| 408 |
(*document* (make-instance 'document :empty t))) |
|---|
| 409 |
(with-open-file (*pdf-input-stream* file |
|---|
| 410 |
:direction :input |
|---|
| 411 |
:external-format +external-format+) |
|---|
| 412 |
(read-pdf)) |
|---|
| 413 |
*document*)) |
|---|
| 414 |
|
|---|
| 415 |
(defun collect-pages (page pages-vector root-page-node) |
|---|
| 416 |
(if (string= (get-dict-value (content page) "/Type") "/Pages") |
|---|
| 417 |
(progn |
|---|
| 418 |
(loop for kid-page across (get-dict-value (content page) "/Kids") |
|---|
| 419 |
do (collect-pages kid-page pages-vector root-page-node)) |
|---|
| 420 |
(unless (eq page root-page-node) (delete-indirect-object page))) |
|---|
| 421 |
(progn |
|---|
| 422 |
(unless (eq page root-page-node) |
|---|
| 423 |
(change-dict-value (content page) "/Parent" root-page-node)) |
|---|
| 424 |
(vector-push-extend page pages-vector)))) |
|---|
| 425 |
|
|---|
| 426 |
(defun read-pdf () |
|---|
| 427 |
(let* ((trailer (read-xref-and-trailer (find-cross-reference-start))) |
|---|
| 428 |
(%visited-object% (make-hash-table :test #'eql))) |
|---|
| 429 |
(setf (catalog *document*) (get-dict-value trailer "/Root") |
|---|
| 430 |
(docinfo *document*) (get-dict-value trailer "/Info")) |
|---|
| 431 |
(load-all-indirect-objects) |
|---|
| 432 |
(let* ((root-page-node (change-class (get-dict-value (content (catalog *document*)) "/Pages") 'page-node)) |
|---|
| 433 |
(pages-vector (make-array 1 :fill-pointer 0 :adjustable t))) |
|---|
| 434 |
(collect-pages root-page-node pages-vector root-page-node) |
|---|
| 435 |
(setf (pages root-page-node) pages-vector) |
|---|
| 436 |
(change-dict-value (content root-page-node) "/Count" #'(lambda () (length (pages root-page-node)))) |
|---|
| 437 |
(change-dict-value (content root-page-node) "/Kids" (pages root-page-node)) |
|---|
| 438 |
(renumber-all-indirect-objects) |
|---|
| 439 |
(setf (root-page *document*) root-page-node)))) |
|---|
| 440 |
|
|---|
| 441 |
(defvar *original-content* nil) |
|---|
| 442 |
(defvar *current-content* nil) |
|---|
| 443 |
|
|---|
| 444 |
(defun insert-original-page-content () |
|---|
| 445 |
;; save graphics state |
|---|
| 446 |
(write-line " q" *page-stream*) |
|---|
| 447 |
(vector-push-extend (make-instance 'indirect-object :content |
|---|
| 448 |
(make-instance 'pdf-stream :content |
|---|
| 449 |
(get-output-stream-string *page-stream*))) |
|---|
| 450 |
*current-content*) |
|---|
| 451 |
(if (vectorp *original-content*) |
|---|
| 452 |
(loop for content across *original-content* do |
|---|
| 453 |
(vector-push-extend content *current-content*)) |
|---|
| 454 |
(vector-push-extend *original-content* *current-content*)) |
|---|
| 455 |
(setf *page-stream* (make-string-output-stream)) |
|---|
| 456 |
;; restore graphics state |
|---|
| 457 |
(write-line " Q" *page-stream*)) |
|---|
| 458 |
|
|---|
| 459 |
(export 'insert-original-page-content) |
|---|
| 460 |
|
|---|
| 461 |
(defun ensure-dictionary (obj) |
|---|
| 462 |
(if (typep obj 'indirect-object) |
|---|
| 463 |
(content obj) |
|---|
| 464 |
obj)) |
|---|
| 465 |
|
|---|
| 466 |
(defun open-page (page-num) |
|---|
| 467 |
(let* ((page (aref (pages *root-page*) page-num)) |
|---|
| 468 |
(dict (content page)) |
|---|
| 469 |
(resources (ensure-dictionary (get-dict-value dict "/Resources")))) |
|---|
| 470 |
(let ((fonts (ensure-dictionary (get-dict-value resources "/Font"))) |
|---|
| 471 |
(xobjects (ensure-dictionary (get-dict-value resources "/XObject"))) |
|---|
| 472 |
(content-stream (make-instance 'pdf-stream))) |
|---|
| 473 |
(setf *original-content* (get-dict-value dict "/Contents")) |
|---|
| 474 |
(setf *current-content* (make-array 10 :fill-pointer 0 :adjustable t )) |
|---|
| 475 |
(change-class page 'page) |
|---|
| 476 |
(unless resources |
|---|
| 477 |
(setf resources (make-instance 'dictionary))) |
|---|
| 478 |
(change-dict-value dict "/Resources" resources) |
|---|
| 479 |
(unless fonts |
|---|
| 480 |
(setf fonts (make-instance 'dictionary))) |
|---|
| 481 |
(change-dict-value resources "/Font" fonts) |
|---|
| 482 |
(unless xobjects |
|---|
| 483 |
(setf xobjects (make-instance 'dictionary))) |
|---|
| 484 |
(change-dict-value resources "/XObject" xobjects) |
|---|
| 485 |
(setf (bounds page)(get-dict-value dict "/MediaBox") |
|---|
| 486 |
(resources page) resources |
|---|
| 487 |
(font-objects page) fonts |
|---|
| 488 |
(xobjects page) xobjects |
|---|
| 489 |
(content-stream page) content-stream) |
|---|
| 490 |
(change-dict-value dict "/Contents" *current-content*)) |
|---|
| 491 |
page)) |
|---|
| 492 |
|
|---|
| 493 |
(defmacro with-existing-document ((file &key (creator "") author title subject keywords) &body body) |
|---|
| 494 |
`(let* ((*document* (read-pdf-file ,file)) |
|---|
| 495 |
(*root-page* (root-page *document*)) |
|---|
| 496 |
(*outlines-stack* (list (outline-root *document*))) |
|---|
| 497 |
(*page* nil) |
|---|
| 498 |
(*page-number* 0)) |
|---|
| 499 |
(add-doc-info *document* :creator ,creator :author ,author |
|---|
| 500 |
:title ,title :subject ,subject :keywords ,keywords) |
|---|
| 501 |
,@body)) |
|---|
| 502 |
|
|---|
| 503 |
(export 'with-existing-document) |
|---|
| 504 |
|
|---|
| 505 |
(defmacro with-existing-page ((page-number) &body body) |
|---|
| 506 |
`(let* ((*original-content* nil) |
|---|
| 507 |
(*current-content* nil) |
|---|
| 508 |
(*page* (open-page ,page-number))) |
|---|
| 509 |
(setf (content (content-stream *page*)) |
|---|
| 510 |
(let ((*page-stream* (make-string-output-stream))) |
|---|
| 511 |
,@body |
|---|
| 512 |
(vector-push-extend |
|---|
| 513 |
(make-instance 'indirect-object :content |
|---|
| 514 |
(make-instance 'pdf-stream :content |
|---|
| 515 |
(get-output-stream-string pdf::*page-stream*))) |
|---|
| 516 |
*current-content*))))) |
|---|
| 517 |
|
|---|
| 518 |
(export 'with-existing-page) |
|---|
| 519 |
|
|---|
| 520 |
#| |
|---|
| 521 |
|
|---|
| 522 |
(pdf:with-existing-document (#P"/tmp/MS-32.pdf") |
|---|
| 523 |
(pdf:with-existing-page (0) |
|---|
| 524 |
(let ((helvetica (pdf:get-font "Helvetica"))) |
|---|
| 525 |
(pdf:in-text-mode |
|---|
| 526 |
(pdf:set-font helvetica 45.0) |
|---|
| 527 |
(pdf:move-text 95 700) |
|---|
| 528 |
(pdf:draw-text "cl-pdf-parser example")) |
|---|
| 529 |
(pdf:with-saved-state |
|---|
| 530 |
(pdf:translate 200 60) |
|---|
| 531 |
(pdf:scale 0.7 0.7) |
|---|
| 532 |
(pdf:rotate 15) |
|---|
| 533 |
(pdf:insert-original-page-content)) |
|---|
| 534 |
(pdf:translate 230 400) |
|---|
| 535 |
(loop repeat 170 |
|---|
| 536 |
for i = 0.67 then (* i 1.03) |
|---|
| 537 |
do (pdf:in-text-mode |
|---|
| 538 |
(pdf:set-font helvetica i) |
|---|
| 539 |
(pdf:set-rgb-fill (/ (random 255) 255.0)(/ (random 255) 255.0)(/ (random 255) 255.0)) |
|---|
| 540 |
(pdf:move-text (* i 3) 0) |
|---|
| 541 |
(pdf:draw-text "cl-pdf")) |
|---|
| 542 |
(pdf:rotate 13)))) |
|---|
| 543 |
(pdf:with-existing-page (1) |
|---|
| 544 |
(let ((helvetica (pdf:get-font "Helvetica"))) |
|---|
| 545 |
(pdf:insert-original-page-content) |
|---|
| 546 |
(pdf:in-text-mode |
|---|
| 547 |
(pdf:set-font helvetica 60.0) |
|---|
| 548 |
(pdf:move-text 250 250) |
|---|
| 549 |
(pdf:rotate 30) |
|---|
| 550 |
(pdf:set-rgb-fill 1.0 0.0 0.0) |
|---|
| 551 |
(pdf:draw-text "cl-pdf-parser example")))) |
|---|
| 552 |
(pdf:with-page () ;add a new page |
|---|
| 553 |
(let ((helvetica (pdf:get-font "Helvetica"))) |
|---|
| 554 |
(pdf:in-text-mode |
|---|
| 555 |
(pdf:set-font helvetica 36.0) |
|---|
| 556 |
(pdf:move-text 100 800) |
|---|
| 557 |
(pdf:draw-text "cl-pdf: Example 1")) |
|---|
| 558 |
(pdf:translate 230 500) |
|---|
| 559 |
(loop repeat 150 |
|---|
| 560 |
for i = 0.67 then (* i 1.045) |
|---|
| 561 |
do (pdf:in-text-mode |
|---|
| 562 |
(pdf:set-font helvetica i) |
|---|
| 563 |
(pdf:set-rgb-fill (/ (random 255) 255.0)(/ (random 255) 255.0)(/ (random 255) 255.0)) |
|---|
| 564 |
(pdf:move-text (* i 3) 0) |
|---|
| 565 |
(pdf:draw-text "cl-typesetting")) |
|---|
| 566 |
(pdf:rotate 13)))) |
|---|
| 567 |
(pdf:write-document #P"/tmp/t.pdf")) |
|---|
| 568 |
|
|---|
| 569 |
|# |
|---|