root/trunk/bknr/datastore/src/data/encoding.lisp

Revision 3950, 18.0 kB (checked in by hans, 2 months ago)

Make it possible to restore datastores when packages have been deleted
which are referenced by objects in the store.

  • Property svn:eol-style set to native
  • Property svn:keywords set to author date id revision
Line 
1 ;;;; Reading and writing Lisp objects in a binary format.
2
3 ;;; Design:
4 ;;;
5 ;;;   - compact storage requirements
6 ;;;   - no arbitary limits (e.g. integers may be arbitarily large)
7 ;;;   - high read and write performance, thus no checking for cyclic data
8
9 ;;; For every supported data type, a character is defined as tag denoting
10 ;;; the type when reading.
11 ;;;
12 ;;; The functions ENCODE and DECODE encode and decode an arbitary object.
13 ;;; Upon write, ENCODE determines the data type from the lisp data type.
14 ;;; Upon read, DECODE determines the data type of the object by looking at
15 ;;; the tag character.
16 ;;;
17 ;;; If the data type is known upfron, the respective coder function can be
18 ;;; called directly.  ENCODE-INTEGER encodes an integer, for example.
19 ;;;
20 ;;; At certain file positions, only one datatype makes sense (i.e. when
21 ;;; writing a structure with a fixed layout).  In this case, the tag need
22 ;;; not be written.  For this purpose, a low-level function, i.e.
23 ;;; %ENCODE-INTEGER, exists to write the object without writing a tag,
24 ;;; and a matching decode function DECODE-INTEGER to read such untagged
25 ;;; data.
26
27 ;;; Format:
28
29 ;;;    Field    Format     Comment
30 ;;; ----------------------------------------------------------------
31 ;;; Integer
32 ;;;     tag     #\i
33 ;;;     n       byte       Number of bytes that follow
34 ;;;     data    byte[n]    The actual data, a big endian number
35 ;;;
36 ;;; ----------------------------------------------------------------
37 ;;; Rational
38 ;;;     tag     #\r
39 ;;;     n       byte       Number of bytes that follow
40 ;;;     data    byte[n]    The numerator, a big endian number
41 ;;;     n       byte       Number of bytes that follow
42 ;;;     data    byte[n]    The denominator, a big endian number
43 ;;;
44 ;;; ----------------------------------------------------------------
45 ;;; Reference to a STORE-OBJECT
46 ;;;     tag     #\o
47 ;;;     ID      %integer   ID of the referenced object
48 ;;;
49 ;;; ----------------------------------------------------------------
50 ;;; List
51 ;;;     tag     #\l
52 ;;;     n       %integer   Number of bytes that follow
53 ;;;     data    object[n]  Objects including tag
54 ;;;     tail    object     If n != 0: CDR of the last cons
55 ;;;
56 ;;; ----------------------------------------------------------------
57 ;;; Char
58 ;;;     tag     #\c
59 ;;;     data    char       Character, written with WRITE-CHAR
60 ;;; ----------------------------------------------------------------
61 ;;; String
62 ;;;     tag     #\s
63 ;;;     n       %integer   Number of bytes that follow
64 ;;;     data    char[n]    Characters, written with WRITE-CHAR
65 ;;; Note that the layout of strings will change to not use WRITE-CHAR
66 ;;;
67 ;;; ----------------------------------------------------------------
68 ;;; Symbol
69 ;;;     tag     #\y
70 ;;;     package %string    Name of the home package of the symbol
71 ;;;     name    %string    Name of the symbol
72 ;;;
73 ;;; ----------------------------------------------------------------
74 ;;; Hash-Table
75 ;;;     tag     #\#
76 ;;;     test    %symbol    hash-table-test-function
77 ;;;     r.-size %double    hash-table-rehash-size
78 ;;;     n       %integer   Number of value pairs that follow
79 ;;;     data    pair[n]    Value pairs in the following format
80 ;;;
81 ;;;   pair:
82 ;;;     key     object     Objekt with tag
83 ;;;     value   object     Objekt with tag
84 ;;;
85 ;;; ----------------------------------------------------------------
86 ;;; Single-Float
87 ;;;     tag     #\f
88 ;;;     data    byte[4]    IEEE representation (big endian)
89 ;;;
90 ;;; ----------------------------------------------------------------
91 ;;; Double-Float
92 ;;;     tag     #\d
93 ;;;     data    byte[8]    IEEE representation (big endian)
94 ;;;
95 ;;; ----------------------------------------------------------------
96 ;;; Array (saves all standard array attributes except for displacedness)
97 ;;;     tag     #\a
98 ;;;     type    %symbol           ARRAY-ELEMENT-TYPE
99 ;;;     flags   byte
100 ;;;               bit 0: array is a vector
101 ;;;               bit 1: array is adjustable
102 ;;;               bit 2: vector has a fill-pointer
103 ;;;               other bits reserved
104 ;;;
105 ;;;   if (flags has bit 0 set) {
106 ;;;     length  %integer          Length of fector
107 ;;;   } else {
108 ;;;     n       %integer          Number of dimensions following
109 ;;;     dims    %integer[n]       ARRAY-DIMENSIONS
110 ;;;   }
111 ;;;
112 ;;;   if (flags has bit 2 set) {
113 ;;;     fp      %integer          fill-pointer
114 ;;;   }
115 ;;;
116 ;;;     data    object[\Pi dims]  Data in row-major-order
117 ;;;
118 ;;; ----------------------------------------------------------------
119
120 (in-package :bknr.datastore)
121
122 ;;;; workaround
123
124 (declaim (inline %read-char %write-char))
125 (defun %read-char (stream)
126   (code-char (%decode-uint32 stream)))
127
128 (defun %write-char (char stream)
129   (%encode-int32 (char-code char) stream))
130
131 ;;;; tags
132 (declaim (inline %read-tag %write-tag))
133 (defun %read-tag (stream &optional (eof-error-p t) eof-value)
134   (let ((b (read-byte stream eof-error-p -1)))
135     (if (= b -1)
136         eof-value
137         (code-char b))))
138
139 (defun %write-tag (char stream)
140   (write-byte (char-code char) stream))
141
142 ;;;; binary encoding
143
144 (defun %encode-int32 (object stream)
145   (write-byte (ldb (byte 8 24) object) stream)
146   (write-byte (ldb (byte 8 16) object) stream)
147   (write-byte (ldb (byte 8 08) object) stream)
148   (write-byte (ldb (byte 8 00) object) stream))
149
150 (defun %encode-int16 (object stream)
151   (write-byte (ldb (byte 8 08) object) stream)
152   (write-byte (ldb (byte 8 00) object) stream))
153
154 (defun %encode-integer (object stream)
155   (let ((n (ceiling (1+ (integer-length object)) 8)))
156     (write-byte n stream)
157     (loop
158        for i from (- (* n 8) 8) downto 0 by 8
159        do (write-byte (ldb (byte 8 i) object) stream))))
160
161 (defun %encode-rational (object stream)
162   (%encode-integer (numerator object) stream)
163   (%encode-integer (denominator object) stream))
164
165 (defun encode-integer (object stream)
166   (%write-tag #\i stream)
167   (%encode-integer object stream))
168
169 (defun encode-rational (object stream)
170   (%write-tag #\r stream)
171   (%encode-rational object stream))
172
173 (defun count-conses (list)
174   ;; Vorsicht, CMUCL LOOP hat einen Bug mit dotted lists.
175   ;; Daher nicht FOR-ON verwenden.
176   (loop for l = list then (cdr l)
177      while (consp l)
178      count 1))
179
180 (defun %encode-list (object stream)
181   (let ((len (count-conses object)))
182     (%encode-integer len stream)
183     ;; Vorsicht, CMUCL LOOP hat einen Bug mit dotted lists.
184     ;; Daher nicht FOR-ON verwenden.
185     (when (> len 0)
186       (loop for l = object then (cdr l)
187          while (consp l)
188          do (encode (car l) stream)
189          finally (encode l stream)))))
190
191 (defun encode-list (object stream)
192   (%write-tag #\l stream)
193   (%encode-list object stream))
194
195 (defun encode-char (object stream)
196   (%write-tag #\c stream)
197   (%write-char object stream))
198
199 (defun %encode-string (object stream)
200   (let ((octets (trivial-utf-8:string-to-utf-8-bytes object)))
201     (%encode-integer (length octets) stream)
202     (write-sequence octets stream)))
203
204 (defun encode-string (object stream)
205   (%write-tag #\s stream)
206   (%encode-string object stream))
207
208 (defun %encode-symbol (object stream)
209   (%encode-string (package-name (symbol-package object)) stream)
210   (%encode-string (symbol-name object) stream))
211
212 (defun encode-symbol (object stream)
213   (%write-tag #\y stream)
214   (%encode-symbol object stream))
215
216 (defun encode-hash-table (object stream)
217   (%write-tag #\# stream)
218   (%encode-symbol (hash-table-test object) stream)
219   (%encode-double-float (float (hash-table-rehash-size object) 1.0d0) stream)
220   (%encode-integer (hash-table-count object) stream)
221   (maphash (lambda (k v)
222              (encode k stream)
223              (encode v stream))
224            object))
225
226 (defun %encode-single-float (object stream)
227   #+allegro
228   (map nil #'(lambda (short)
229                (%encode-int16 short stream))
230        (multiple-value-list (excl::single-float-to-shorts object)))
231   #+cmu
232   (%encode-int32 (kernel:single-float-bits object) stream)
233   #+openmcl
234   (%encode-int32 (ccl::single-float-bits object) stream)
235   #+sbcl
236   (%encode-int32 (sb-kernel:single-float-bits object) stream))
237
238 (defun encode-single-float (object stream)
239   (%write-tag #\f stream)
240   (%encode-single-float object stream))
241
242 (defun %encode-double-float (object stream)
243   #+cmucl
244   (map nil #'(lambda (short)
245                (%encode-int16 short stream))
246        (multiple-value-list (excl::double-float-to-shorts object)))
247   #+cmu
248   (progn (%encode-int32 (kernel:double-float-high-bits object) stream)
249          (%encode-int32 (kernel:double-float-low-bits object) stream))
250   #+openmcl
251   (multiple-value-bind (hi lo) (ccl::double-float-bits object)
252     (%encode-int32 hi stream)
253     (%encode-int32 lo stream))
254   #+sbcl
255   (progn (%encode-int32 (sb-kernel:double-float-high-bits object) stream)
256          (%encode-int32 (sb-kernel:double-float-low-bits object) stream)))
257
258 (defun encode-double-float (object stream)
259   (%write-tag #\d stream)
260   (%encode-double-float object stream))
261
262 (defun %encode-array (object stream)
263   (%encode-symbol (array-element-type object) stream)
264   (let* ((vectorp (typep object 'vector))
265          (fill-pointer-p (array-has-fill-pointer-p object))
266          (flags (logior (if vectorp 1 0)
267                         (if (adjustable-array-p object) 2 0)
268                         (if fill-pointer-p 4 0)))
269          (dims (array-dimensions object)))
270     (write-byte flags stream)
271     (cond
272       (vectorp
273        (%encode-integer (car dims) stream))
274       (t
275        (%encode-integer (length dims) stream)
276        (dolist (d dims)
277          (%encode-integer d stream))))
278     (when fill-pointer-p
279       (%encode-integer (fill-pointer object) stream))
280     (dotimes (i (reduce #'* dims))
281       (encode (row-major-aref object i) stream))))
282
283 (defun encode-array (object stream)
284   (%write-tag #\a stream)
285   (%encode-array object stream))
286
287 (defun encode (object stream)
288   (typecase object
289     (integer (encode-integer object stream))
290     (rational (encode-rational object stream))
291     (symbol (encode-symbol object stream))
292     (character (encode-char object stream))
293     (string (encode-string object stream))
294     (list (encode-list object stream))
295     (array (encode-array object stream))
296     (hash-table (encode-hash-table object stream))
297     (single-float (encode-single-float object stream))
298     (double-float (encode-double-float object stream))
299     (t (encode-object object stream))))
300
301 (defgeneric encode-object (object stream))
302
303 ;;;; decoding
304
305 (defun %decode-integer/fixed (stream n)
306   (let* ((initial (read-byte stream))
307          (result (if (logbitp 7 initial) -1 0)))
308     (setf result (logior (ash result 8) initial))
309     (dotimes (x (1- n))
310       (setf result (logior (ash result 8) (read-byte stream))))
311     result))
312
313 (defun %decode-uint16 (stream)
314   (logior (ash (read-byte stream) 08)
315           (read-byte stream)))
316
317 (defun %decode-sint32 (stream)
318   (%decode-integer/fixed stream 4))
319
320 (defun %decode-uint32 (stream)
321   (logior (ash (read-byte stream) 24)
322           (ash (read-byte stream) 16)
323           (ash (read-byte stream) 08)
324           (read-byte stream)))
325
326 (defun %decode-integer (stream)
327   (let ((n (read-byte stream)))
328     (assert (plusp n))                  ;n==0 geben wir nicht aus
329     (%decode-integer/fixed stream n)))
330
331 (defun %decode-rational (stream)
332   (/ (%decode-integer stream)
333      (%decode-integer stream)))
334
335 (defun %decode-char (stream)
336   (%read-char stream))
337
338 (defun %decode-string (stream)
339   (labels ((octets-to-string-safe (octets) ; safe and portable
340              (let ((flexi-streams:*substitution-char* #\?))
341                (handler-case
342                    (flexi-streams:octets-to-string octets :external-format :utf-8)
343                  (flexi-streams:external-format-condition (e)
344                    (declare (ignore e))
345                    (let ((string (flexi-streams:octets-to-string octets :external-format :ascii)))
346                      (warn "could not decode string ~S as utf-8, decoded as ASCII" string)
347                      string)))))
348            (octets-to-string (octets)
349              (handler-case
350                  (trivial-utf-8:utf-8-bytes-to-string octets)
351                (trivial-utf-8:utf-8-decoding-error ()
352                  (octets-to-string-safe octets)))))
353     (let* ((n (%decode-integer stream))
354            (buffer (make-array n :element-type '(unsigned-byte 8))))
355       (assert (= n (read-sequence buffer stream)))
356       (octets-to-string buffer))))
357
358 (defun find-symbol-in-all-packages (name)
359   (let (symbols)
360     (do-all-symbols (symbol symbols)
361       (when (string-equal symbol name)
362         (pushnew symbol symbols)))))
363
364 (defun find-symbol-interactively (package-name symbol-name usage)
365   (let ((keyword (string-equal package-name "KEYWORD")))
366     (restart-case
367         (multiple-value-bind (symbol status)
368             (funcall (if keyword
369                          #'intern
370                          #'find-symbol)
371                      symbol-name
372                      (or (find-package package-name)
373                          (error "package ~A for symbol ~A~@[ naming ~A~] not found" package-name symbol-name usage)))
374           (if (or keyword status)
375               symbol
376               (error "symbol ~A~@[ naming ~A~] not found in package ~A" symbol-name usage package-name)))
377       (use-other-symbol (new-symbol)
378         :interactive (lambda ()
379                        (format t "Enter symbol~@[ (homonyms: ~{~S~^, ~})~]: " (find-symbol-in-all-packages symbol-name))
380                        (let ((new-symbol (ignore-errors (read))))
381                          (list new-symbol)))
382         :report (lambda (stream) (format stream "Use another symbol~@[, homonyms: ~S~]" (find-symbol-in-all-packages symbol-name)))
383         new-symbol)
384       (read-as-nil ()
385         :report "Read symbol as NIL"
386         nil))))
387
388 (defun %decode-symbol (stream &key (intern t) usage)
389   (let ((package-name (%decode-string stream))
390         (symbol-name (%decode-string stream)))
391     (when intern
392       (find-symbol-interactively package-name symbol-name usage))))
393
394 (defun %decode-list (stream)
395   (let* ((n (%decode-integer stream))
396          (result (loop repeat n collect (decode stream)))
397          (tail (and (plusp n) (decode stream))))
398     (when tail
399       (setf (cdr (last result)) tail))
400     result))
401
402 (defun %decode-hash-table (stream)
403   (let* ((test (%decode-symbol stream :usage "hash table test"))
404          (rehash-size (%decode-double-float stream))
405          (n (%decode-integer stream))
406          (result (make-hash-table :test test :size n :rehash-size rehash-size)))
407     (dotimes (x n)
408       (let ((key (decode stream))
409             (value (decode stream)))
410         (setf (gethash key result) value)))
411     result))
412
413 (defun %decode-single-float (stream)
414   #+allegro
415   (excl::shorts-to-single-float (%decode-uint16 stream)
416                                 (%decode-uint16 stream))
417   #+cmu
418   (kernel:make-single-float (%decode-sint32 stream))
419   #+openmcl
420   (make-single-float (%decode-sint32 stream))
421   #+sbcl
422   (sb-kernel:make-single-float (%decode-sint32 stream)))
423
424 (defun %decode-double-float (stream)
425   #+allegro
426   (excl::shorts-to-double-float (%decode-uint16 stream)
427                                 (%decode-uint16 stream)
428                                 (%decode-uint16 stream)
429                                 (%decode-uint16 stream))
430   #+cmu
431   (kernel:make-double-float (%decode-sint32 stream)
432                             (%decode-uint32 stream))
433   #+openmcl
434   (make-double-float (%decode-sint32 stream)
435                      (%decode-uint32 stream))
436   #+sbcl
437   (sb-kernel:make-double-float (%decode-sint32 stream)
438                                (%decode-uint32 stream)))
439
440 (defun %decode-array (stream)
441   (let* ((element-type (%decode-symbol stream :usage "array element type"))
442          (flags (read-byte stream))
443          (vectorp (logbitp 0 flags))
444          (adjustablep (logbitp 1 flags))
445          (fill-pointer-p (logbitp 2 flags))
446          (dimensions
447           (if vectorp
448               (list (%decode-integer stream))
449               (loop repeat (%decode-integer stream)
450                  collect (%decode-integer stream))))
451          (fill-pointer
452           (if fill-pointer-p
453               (%decode-integer stream)
454               nil))
455          (result (make-array dimensions
456                              :element-type element-type
457                              :adjustable adjustablep
458                              :fill-pointer fill-pointer)))
459     (dotimes (i (reduce #'* dimensions))
460       (setf (row-major-aref result i) (decode stream)))
461     result))
462
463 (defun decode (stream)
464   (let ((tag (%read-tag stream)))
465     (case tag
466       (#\a (%decode-array stream))
467       (#\i (%decode-integer stream))
468       (#\y (%decode-symbol stream))
469       (#\c (%decode-char stream))
470       (#\s (%decode-string stream))
471       (#\l (%decode-list stream))
472       (#\# (%decode-hash-table stream))
473       (#\f (%decode-single-float stream))
474       (#\d (%decode-double-float stream))
475       (#\r (%decode-rational stream))
476       (t (decode-object tag stream)))))
477
478 (defgeneric decode-object (tag stream))
479
480 ;;;; OpenMCL does not have these functions
481 (defun make-single-float (bits)
482   (cond
483     ;; IEEE float special cases
484     ((zerop bits) 0.0)
485     ((= bits #x-80000000) -0.0)
486     (t (let* ((sign (ecase (ldb (byte 1 31) bits)
487                       (0  1.0)
488                       (1 -1.0)))
489               (iexpt (ldb (byte 8 23) bits))
490               (expt (if (zerop iexpt)   ; denormalized
491                         -126
492                         (- iexpt 127)))
493               (mant (* (logior (ldb (byte 23 0) bits)
494                                (if (zerop iexpt)
495                                    0
496                                    (ash 1 23)))
497                        (expt 0.5 23))))
498          (* sign (expt 2.0 expt) mant)))))
499
500 #+openmcl
501 (defun make-double-float (hi lo)
502   (cond
503     ;; IEEE float special cases
504     ((and (zerop hi) (zerop lo)) 0.0d0)
505     ((and (= hi #x-80000000) (zerop lo)) -0.0d0)
506     (t (let* ((bits (logior (ash hi 32) lo))
507               (sign (ecase (ldb (byte 1 63) bits)
508                       (0  1.0d0)
509                       (1 -1.0d0)))
510               (iexpt (ldb (byte 11 52) bits))
511               (expt (if (zerop iexpt)   ; denormalized
512                         -1022
513                         (- iexpt 1023)))
514               (mant (* (logior (ldb (byte 52 0) bits)
515                                (if (zerop iexpt)
516                                    0
517                                    (ash 1 52)))
518                        (expt 0.5d0 52))))
519          (* sign (expt 2.0d0 expt) mant)))))
Note: See TracBrowser for help on using the browser.