mirror of
https://github.com/google/brotli.git
synced 2024-11-21 19:20:09 +00:00
Address the DISCUSS ballot position from the IESG review of the spec.
This commit is contained in:
parent
136d39bd70
commit
3a9032ba87
@ -7,9 +7,9 @@
|
||||
.ds LF Alakuijala & Szabadka
|
||||
.ds RF FORMFEED[Page %]
|
||||
.ds LH Internet-Draft
|
||||
.ds RH April 2016
|
||||
.ds RH May 2016
|
||||
.ds CH Brotli
|
||||
.ds CF Expires October 19, 2016
|
||||
.ds CF Expires November 4, 2016
|
||||
.hy 0
|
||||
.nh
|
||||
.ad l
|
||||
@ -18,13 +18,13 @@
|
||||
.tl 'Network Working Group''J. Alakuijala'
|
||||
.tl 'Internet-Draft''Z. Szabadka'
|
||||
.tl 'Intended Status: Informational''Google, Inc'
|
||||
.tl 'Expires: October 19, 2016''April 2016'
|
||||
.tl 'Expires: November 4, 2016''May 2016'
|
||||
.fi
|
||||
|
||||
|
||||
.ce 2
|
||||
Brotli Compressed Data Format
|
||||
draft-alakuijala-brotli-09
|
||||
draft-alakuijala-brotli-10
|
||||
.fi
|
||||
.in 3
|
||||
|
||||
@ -52,7 +52,7 @@ and may be updated, replaced, or obsoleted by other documents at any
|
||||
time. It is inappropriate to use Internet-Drafts as reference
|
||||
material or to cite them other than as "work in progress."
|
||||
|
||||
This Internet-Draft will expire on October 19, 2016.
|
||||
This Internet-Draft will expire on November 4, 2016.
|
||||
|
||||
.ti 0
|
||||
Copyright Notice
|
||||
@ -1188,7 +1188,7 @@ using the following lookup tables Lut0, Lut1, and Lut2.
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7
|
||||
.fi
|
||||
|
||||
The lengths and the zlib CRC-32 (ITU-T Recommendation V.42) check values
|
||||
The lengths and the CRC-32 check values (see Appendix C.)
|
||||
of each of these tables as a sequence of bytes are as follows:
|
||||
|
||||
.nf
|
||||
@ -1395,7 +1395,7 @@ where the _i subscript denotes the transform_id above. Each T_i
|
||||
is one of the following 21 elementary transforms:
|
||||
|
||||
.nf
|
||||
Identity, UppercaseFirst, UppercaseAll,
|
||||
Identity, FermentFirst, FermentAll,
|
||||
OmitFirst1, ..., OmitFirst9, OmitLast1, ..., OmitLast9
|
||||
.fi
|
||||
|
||||
@ -1404,9 +1404,9 @@ The form of these elementary transforms is as follows:
|
||||
.nf
|
||||
Identity(word) = word
|
||||
|
||||
UppercaseFirst(word) = first UTF-8 character of word upper-cased
|
||||
FermentFirst(word) = see below
|
||||
|
||||
UppercaseAll(word) = all UTF-8 characters of word upper-cased
|
||||
FermentAll(word) = see below
|
||||
|
||||
OmitFirstk(word) = the last (length(word) - k) bytes of word, or
|
||||
empty string if length(word) < k
|
||||
@ -1415,31 +1415,48 @@ The form of these elementary transforms is as follows:
|
||||
empty string if length(word) < k
|
||||
.fi
|
||||
|
||||
For the purposes of UppercaseAll, word is parsed into UTF-8
|
||||
characters and converted to upper-case by taking 1..3 bytes at a time,
|
||||
using the algorithm below:
|
||||
We define the FermentFirst and FermentAll transforms used in this
|
||||
specification by the following C language functions:
|
||||
|
||||
.nf
|
||||
.KS
|
||||
i = 0
|
||||
while i < length(word):
|
||||
if word[i] < 192:
|
||||
if word[i] >= 97 and word[i] <= 122:
|
||||
word[i] = word[i] ^ 32
|
||||
i = i + 1
|
||||
else if word[i] < 224:
|
||||
if i + 1 < length(word):
|
||||
word[i + 1] = word[i + 1] ^ 32
|
||||
i = i + 2
|
||||
else:
|
||||
if i + 2 < length(word):
|
||||
word[i + 2] = word[i + 2] ^ 5
|
||||
i = i + 3
|
||||
int Ferment(uint8_t* word, int word_len, int pos) {
|
||||
if (word[pos] < 192) {
|
||||
if (word[pos] >= 97 and word[pos] <= 122) {
|
||||
word[pos] = word[pos] ^ 32;
|
||||
}
|
||||
return 1;
|
||||
} else if (word[pos] < 224) {
|
||||
if (pos + 1 < word_len) {
|
||||
word[pos + 1] = word[pos + 1] ^ 32;
|
||||
}
|
||||
return 2;
|
||||
} else {
|
||||
if (pos + 2 < word_len) {
|
||||
word[pos + 2] = word[pos + 2] ^ 5;
|
||||
}
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
.KE
|
||||
.fi
|
||||
|
||||
For UppercaseFirst, the same algorithm is used, but the loop is
|
||||
executed only once.
|
||||
.nf
|
||||
.KS
|
||||
void FermentFirst(uint8_t* word, int word_len) {
|
||||
if (word_len > 0) {
|
||||
Ferment(word, word_len, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void FermentAll(uint8_t* word, int word_len) {
|
||||
int i = 0;
|
||||
while (i < word_len) {
|
||||
i += Ferment(word, word_len, i);
|
||||
}
|
||||
}
|
||||
.KE
|
||||
.fi
|
||||
|
||||
Appendix B. contains the list of transformations by specifying the
|
||||
prefix, elementary transform and suffix components of each of them.
|
||||
@ -1929,7 +1946,7 @@ of uncompressed bytes.
|
||||
As with any compressed file formats, decompressor implementations should
|
||||
handle all compressed data byte sequences, not only those that conform to this
|
||||
specification, where non-conformant compressed data sequences should be
|
||||
discarded.
|
||||
rejected as invalid.
|
||||
|
||||
A possible attack against a system containing a decompressor
|
||||
implementation (e.g. a web browser) is to exploit a buffer overflow
|
||||
@ -2060,7 +2077,7 @@ specification.
|
||||
Appendix A. Static dictionary data
|
||||
|
||||
The hexadecimal form of the DICT array is the following, where the
|
||||
length is 122,784 bytes and the zlib CRC-32 of the byte sequence is
|
||||
length is 122,784 bytes and the CRC-32 of the byte sequence is
|
||||
0x5136cb04.
|
||||
|
||||
.in 0
|
||||
@ -5921,10 +5938,10 @@ In order to generate a length and check value, the transforms can be converted
|
||||
to a series of bytes, where each transform is the prefix sequence of bytes plus
|
||||
a terminating zero byte, a single byte value identifying the transform, and the
|
||||
suffix sequence of bytes plus a terminating zero. The value for the transforms
|
||||
are 0 for Identity, 1 for UppercaseFirst, 2 for UppercaseAll, 3 to 11 for
|
||||
are 0 for Identity, 1 for FermentFirst, 2 for FermentAll, 3 to 11 for
|
||||
OmitFirst1 to OmitFirst9, and 12 to 20 for OmitLast1 to OmitLast9. The byte
|
||||
sequences that represent the 121 transforms are then concatenated to a single
|
||||
sequence of bytes. The length of that sequence is 648 bytes, and the zlib CRC
|
||||
sequence of bytes. The length of that sequence is 648 bytes, and the CRC-32
|
||||
is 0x3d965f81.
|
||||
|
||||
.nf
|
||||
@ -5934,18 +5951,18 @@ is 0x3d965f81.
|
||||
1 "" Identity " "
|
||||
2 " " Identity " "
|
||||
3 "" OmitFirst1 ""
|
||||
4 "" UppercaseFirst " "
|
||||
4 "" FermentFirst " "
|
||||
5 "" Identity " the "
|
||||
6 " " Identity ""
|
||||
7 "s " Identity " "
|
||||
8 "" Identity " of "
|
||||
9 "" UppercaseFirst ""
|
||||
9 "" FermentFirst ""
|
||||
10 "" Identity " and "
|
||||
11 "" OmitFirst2 ""
|
||||
12 "" OmitLast1 ""
|
||||
13 ", " Identity " "
|
||||
14 "" Identity ", "
|
||||
15 " " UppercaseFirst " "
|
||||
15 " " FermentFirst " "
|
||||
16 "" Identity " in "
|
||||
17 "" Identity " to "
|
||||
18 "e " Identity " "
|
||||
@ -5960,7 +5977,7 @@ is 0x3d965f81.
|
||||
27 "" OmitLast2 ""
|
||||
28 "" Identity " a "
|
||||
29 "" Identity " that "
|
||||
30 " " UppercaseFirst ""
|
||||
30 " " FermentFirst ""
|
||||
31 "" Identity ". "
|
||||
32 "." Identity ""
|
||||
33 " " Identity ", "
|
||||
@ -5974,7 +5991,7 @@ is 0x3d965f81.
|
||||
41 " the " Identity ""
|
||||
42 "" OmitLast4 ""
|
||||
43 "" Identity ". The "
|
||||
44 "" UppercaseAll ""
|
||||
44 "" FermentAll ""
|
||||
45 "" Identity " on "
|
||||
46 "" Identity " as "
|
||||
47 "" Identity " is "
|
||||
@ -5988,69 +6005,90 @@ is 0x3d965f81.
|
||||
55 "" OmitFirst7 ""
|
||||
56 "" OmitLast6 ""
|
||||
57 "" Identity "("
|
||||
58 "" UppercaseFirst ", "
|
||||
58 "" FermentFirst ", "
|
||||
59 "" OmitLast8 ""
|
||||
60 "" Identity " at "
|
||||
61 "" Identity "ly "
|
||||
62 " the " Identity " of "
|
||||
63 "" OmitLast5 ""
|
||||
64 "" OmitLast9 ""
|
||||
65 " " UppercaseFirst ", "
|
||||
66 "" UppercaseFirst "\\""
|
||||
65 " " FermentFirst ", "
|
||||
66 "" FermentFirst "\\""
|
||||
67 "." Identity "("
|
||||
68 "" UppercaseAll " "
|
||||
69 "" UppercaseFirst "\\">"
|
||||
68 "" FermentAll " "
|
||||
69 "" FermentFirst "\\">"
|
||||
70 "" Identity "=\\""
|
||||
71 " " Identity "."
|
||||
72 ".com/" Identity ""
|
||||
73 " the " Identity " of the "
|
||||
74 "" UppercaseFirst "'"
|
||||
74 "" FermentFirst "'"
|
||||
75 "" Identity ". This "
|
||||
76 "" Identity ","
|
||||
77 "." Identity " "
|
||||
78 "" UppercaseFirst "("
|
||||
79 "" UppercaseFirst "."
|
||||
78 "" FermentFirst "("
|
||||
79 "" FermentFirst "."
|
||||
80 "" Identity " not "
|
||||
81 " " Identity "=\\""
|
||||
82 "" Identity "er "
|
||||
83 " " UppercaseAll " "
|
||||
83 " " FermentAll " "
|
||||
84 "" Identity "al "
|
||||
85 " " UppercaseAll ""
|
||||
85 " " FermentAll ""
|
||||
86 "" Identity "='"
|
||||
87 "" UppercaseAll "\\""
|
||||
88 "" UppercaseFirst ". "
|
||||
87 "" FermentAll "\\""
|
||||
88 "" FermentFirst ". "
|
||||
89 " " Identity "("
|
||||
90 "" Identity "ful "
|
||||
91 " " UppercaseFirst ". "
|
||||
91 " " FermentFirst ". "
|
||||
92 "" Identity "ive "
|
||||
93 "" Identity "less "
|
||||
94 "" UppercaseAll "'"
|
||||
94 "" FermentAll "'"
|
||||
95 "" Identity "est "
|
||||
96 " " UppercaseFirst "."
|
||||
97 "" UppercaseAll "\\">"
|
||||
96 " " FermentFirst "."
|
||||
97 "" FermentAll "\\">"
|
||||
98 " " Identity "='"
|
||||
99 "" UppercaseFirst ","
|
||||
99 "" FermentFirst ","
|
||||
100 "" Identity "ize "
|
||||
101 "" UppercaseAll "."
|
||||
102 "\\xc2\\xa0" Identity ""
|
||||
101 "" FermentAll "."
|
||||
102 "\\xc2\\xa0" Identity ""
|
||||
103 " " Identity ","
|
||||
104 "" UppercaseFirst "=\\""
|
||||
105 "" UppercaseAll "=\\""
|
||||
104 "" FermentFirst "=\\""
|
||||
105 "" FermentAll "=\\""
|
||||
106 "" Identity "ous "
|
||||
107 "" UppercaseAll ", "
|
||||
108 "" UppercaseFirst "='"
|
||||
109 " " UppercaseFirst ","
|
||||
110 " " UppercaseAll "=\\""
|
||||
111 " " UppercaseAll ", "
|
||||
112 "" UppercaseAll ","
|
||||
113 "" UppercaseAll "("
|
||||
114 "" UppercaseAll ". "
|
||||
115 " " UppercaseAll "."
|
||||
116 "" UppercaseAll "='"
|
||||
117 " " UppercaseAll ". "
|
||||
118 " " UppercaseFirst "=\\""
|
||||
119 " " UppercaseAll "='"
|
||||
120 " " UppercaseFirst "='"
|
||||
107 "" FermentAll ", "
|
||||
108 "" FermentFirst "='"
|
||||
109 " " FermentFirst ","
|
||||
110 " " FermentAll "=\\""
|
||||
111 " " FermentAll ", "
|
||||
112 "" FermentAll ","
|
||||
113 "" FermentAll "("
|
||||
114 "" FermentAll ". "
|
||||
115 " " FermentAll "."
|
||||
116 "" FermentAll "='"
|
||||
117 " " FermentAll ". "
|
||||
118 " " FermentFirst "=\\""
|
||||
119 " " FermentAll "='"
|
||||
120 " " FermentFirst "='"
|
||||
.fi
|
||||
|
||||
.ti 0
|
||||
Appendix C. Computing CRC-32 check values
|
||||
|
||||
For the purpose of this specification, we define the CRC-32 check value of a
|
||||
byte sequence with the following C language function:
|
||||
|
||||
.nf
|
||||
uint32_t CRC32(const uint8_t* v, const int len) {
|
||||
const uint32_t poly = 0xedb88320UL;
|
||||
uint32_t crc, c;
|
||||
int i, k;
|
||||
crc = 0xffffffffUL;
|
||||
for (i = 0; i < len; ++i) {
|
||||
c = (crc ^ v[i]) & 0xff;
|
||||
for (k = 0; k < 8; k++) c = c & 1 ? poly ^ (c >> 1) : c >> 1;
|
||||
crc = c ^ (crc >> 8);
|
||||
}
|
||||
return crc ^ 0xffffffffUL;
|
||||
}
|
||||
.fi
|
||||
.in 3
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user