mirror of
https://github.com/google/brotli.git
synced 2024-11-25 13:00:06 +00:00
Research (#491)
* add advanced mode for optimal references generator * fix #489 Thanks to Ivan Nikulin for working on it.
This commit is contained in:
parent
fe9f9a9182
commit
27d94590a2
@ -84,16 +84,17 @@ int main(int argc, char* argv[]) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
FILE* fimage1 = fopen(argv[1], "rb");
|
|
||||||
FILE* fimage2 = fopen(argv[2], "rb");
|
|
||||||
FILE* fdiff = fopen(argv[3], "wb");
|
|
||||||
|
|
||||||
uint8_t **image1, **image2;
|
uint8_t **image1, **image2;
|
||||||
size_t h1, w1, h2, w2;
|
size_t h1, w1, h2, w2;
|
||||||
|
|
||||||
|
FILE* fimage1 = fopen(argv[1], "rb");
|
||||||
ReadPGM(fimage1, &image1, &h1, &w1);
|
ReadPGM(fimage1, &image1, &h1, &w1);
|
||||||
ReadPGM(fimage2, &image2, &h2, &w2);
|
|
||||||
fclose(fimage1);
|
fclose(fimage1);
|
||||||
|
|
||||||
|
FILE* fimage2 = fopen(argv[2], "rb");
|
||||||
|
ReadPGM(fimage2, &image2, &h2, &w2);
|
||||||
fclose(fimage2);
|
fclose(fimage2);
|
||||||
|
|
||||||
if (!(h1 == h2 && w1 == w2)) {
|
if (!(h1 == h2 && w1 == w2)) {
|
||||||
printf("Images must have the same size.\n");
|
printf("Images must have the same size.\n");
|
||||||
return 1;
|
return 1;
|
||||||
@ -103,7 +104,9 @@ int main(int argc, char* argv[]) {
|
|||||||
for (size_t i = 0; i < h1; ++i) diff[i] = new int[w1];
|
for (size_t i = 0; i < h1; ++i) diff[i] = new int[w1];
|
||||||
CalculateDiff(diff, image1, image2, h1, w1);
|
CalculateDiff(diff, image1, image2, h1, w1);
|
||||||
|
|
||||||
|
FILE* fdiff = fopen(argv[3], "wb");
|
||||||
DrawDiff(diff, image1, image2, h1, w1, fdiff);
|
DrawDiff(diff, image1, image2, h1, w1, fdiff);
|
||||||
fclose(fdiff);
|
fclose(fdiff);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -12,13 +12,23 @@
|
|||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <functional>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include <gflags/gflags.h>
|
#include <gflags/gflags.h>
|
||||||
using gflags::ParseCommandLineFlags;
|
using gflags::ParseCommandLineFlags;
|
||||||
|
|
||||||
#include "./esaxx/sais.hxx"
|
#include "./esaxx/sais.hxx"
|
||||||
|
|
||||||
|
DEFINE_bool(advanced, false, "Advanced searching mode: finds all longest "
|
||||||
|
"matches at positions that are not covered by matches of length at least "
|
||||||
|
"max_length. WARNING: uses much more memory than simple mode, especially "
|
||||||
|
"for small values of min_length.");
|
||||||
DEFINE_int32(min_length, 1, "Minimal length of found backward references.");
|
DEFINE_int32(min_length, 1, "Minimal length of found backward references.");
|
||||||
|
/* For advanced mode. */
|
||||||
|
DEFINE_int32(long_length, 32,
|
||||||
|
"Maximal length of found backward references for advanced mode.");
|
||||||
DEFINE_int32(skip, 1, "Number of bytes to skip.");
|
DEFINE_int32(skip, 1, "Number of bytes to skip.");
|
||||||
|
|
||||||
const size_t kFileBufferSize = (1 << 16); // 64KB
|
const size_t kFileBufferSize = (1 << 16); // 64KB
|
||||||
@ -26,6 +36,9 @@ const size_t kFileBufferSize = (1 << 16); // 64KB
|
|||||||
typedef int sarray_type; // Can't make it unsigned because of templates :(
|
typedef int sarray_type; // Can't make it unsigned because of templates :(
|
||||||
typedef uint8_t input_type;
|
typedef uint8_t input_type;
|
||||||
typedef uint32_t lcp_type;
|
typedef uint32_t lcp_type;
|
||||||
|
typedef std::pair<int, std::vector<int> > entry_type;
|
||||||
|
typedef std::function<void(sarray_type*, lcp_type*, size_t, int, int, int, int,
|
||||||
|
int)> Fn;
|
||||||
|
|
||||||
void ReadInput(FILE* fin, input_type* storage, size_t input_size) {
|
void ReadInput(FILE* fin, input_type* storage, size_t input_size) {
|
||||||
size_t last_pos = 0;
|
size_t last_pos = 0;
|
||||||
@ -59,12 +72,65 @@ void BuildLCP(input_type* storage, sarray_type* sarray, lcp_type* lcp,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProcessReferences(input_type* storage, sarray_type* sarray, lcp_type* lcp,
|
inline void PrintReference(sarray_type* sarray, lcp_type* lcp, size_t size,
|
||||||
size_t size, uint32_t* pos, FILE* fout) {
|
int idx, int left_ix, int right_ix, int left_lcp,
|
||||||
|
int right_lcp, FILE* fout) {
|
||||||
|
int max_lcp_ix;
|
||||||
|
if (right_ix == size - 1 || (left_ix >= 0 && left_lcp >= right_lcp)) {
|
||||||
|
max_lcp_ix = left_ix;
|
||||||
|
} else {
|
||||||
|
max_lcp_ix = right_ix;
|
||||||
|
}
|
||||||
|
int dist = idx - sarray[max_lcp_ix];
|
||||||
|
assert(dist > 0);
|
||||||
|
fputc(1, fout);
|
||||||
|
fwrite(&idx, sizeof(int), 1, fout); // Position in input.
|
||||||
|
fwrite(&dist, sizeof(int), 1, fout); // Backward distance.
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void GoLeft(sarray_type* sarray, lcp_type* lcp, int idx, int left_ix,
|
||||||
|
int left_lcp, entry_type* entry) {
|
||||||
|
entry->first = left_lcp;
|
||||||
|
if (left_lcp > FLAGS_long_length) return;
|
||||||
|
for (; left_ix >= 0; --left_ix) {
|
||||||
|
if (lcp[left_ix] < left_lcp) break;
|
||||||
|
if (sarray[left_ix] < idx) {
|
||||||
|
entry->second.push_back(idx - sarray[left_ix]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void GoRight(sarray_type* sarray, lcp_type* lcp, int idx, size_t size,
|
||||||
|
int right_ix, int right_lcp, entry_type* entry) {
|
||||||
|
entry->first = right_lcp;
|
||||||
|
if (right_lcp > FLAGS_long_length) return;
|
||||||
|
for (; right_ix < size - 1; ++right_ix) {
|
||||||
|
if (lcp[right_ix] < right_lcp) break;
|
||||||
|
if (sarray[right_ix] < idx) {
|
||||||
|
entry->second.push_back(idx - sarray[right_ix]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void StoreReference(sarray_type* sarray, lcp_type* lcp, size_t size,
|
||||||
|
int idx, int left_ix, int right_ix, int left_lcp,
|
||||||
|
int right_lcp, entry_type* entries) {
|
||||||
|
if (right_ix == size - 1 || (left_ix >= 0 && left_lcp > right_lcp)) {
|
||||||
|
// right is invalid or left is better
|
||||||
|
GoLeft(sarray, lcp, idx, left_ix, left_lcp, &entries[idx]);
|
||||||
|
} else if (left_ix < 0 || (right_ix < size - 1 && right_lcp > left_lcp)) {
|
||||||
|
// left is invalid or right is better
|
||||||
|
GoRight(sarray, lcp, idx, size, right_ix, right_lcp, &entries[idx]);
|
||||||
|
} else { // both are valid and of equal length
|
||||||
|
GoLeft(sarray, lcp, idx, left_ix, left_lcp, &entries[idx]);
|
||||||
|
GoRight(sarray, lcp, idx, size, right_ix, right_lcp, &entries[idx]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProcessReferences(sarray_type* sarray, lcp_type* lcp, size_t size,
|
||||||
|
uint32_t* pos, const Fn& process_output) {
|
||||||
int min_length = FLAGS_min_length;
|
int min_length = FLAGS_min_length;
|
||||||
for (int idx = FLAGS_skip; idx < size; ++idx) {
|
for (int idx = FLAGS_skip; idx < size; ++idx) {
|
||||||
int max_lcp = -1;
|
|
||||||
int max_lcp_ix;
|
|
||||||
int left_lcp = -1;
|
int left_lcp = -1;
|
||||||
int left_ix;
|
int left_ix;
|
||||||
for (left_ix = pos[idx] - 1; left_ix >= 0; --left_ix) {
|
for (left_ix = pos[idx] - 1; left_ix >= 0; --left_ix) {
|
||||||
@ -74,10 +140,6 @@ void ProcessReferences(input_type* storage, sarray_type* sarray, lcp_type* lcp,
|
|||||||
if (left_lcp == 0) break;
|
if (left_lcp == 0) break;
|
||||||
if (sarray[left_ix] < idx) break;
|
if (sarray[left_ix] < idx) break;
|
||||||
}
|
}
|
||||||
if (left_ix >= 0) {
|
|
||||||
max_lcp = left_lcp;
|
|
||||||
max_lcp_ix = left_ix;
|
|
||||||
}
|
|
||||||
|
|
||||||
int right_lcp = -1;
|
int right_lcp = -1;
|
||||||
int right_ix;
|
int right_ix;
|
||||||
@ -86,30 +148,48 @@ void ProcessReferences(input_type* storage, sarray_type* sarray, lcp_type* lcp,
|
|||||||
right_lcp = lcp[right_ix];
|
right_lcp = lcp[right_ix];
|
||||||
}
|
}
|
||||||
// Stop if we have better result from the left side already.
|
// Stop if we have better result from the left side already.
|
||||||
if (right_lcp < max_lcp) break;
|
if (right_lcp < left_lcp && left_ix >= 0) break;
|
||||||
if (right_lcp == 0) break;
|
if (right_lcp == 0) break;
|
||||||
if (sarray[right_ix] < idx) break;
|
if (sarray[right_ix] < idx) break;
|
||||||
}
|
}
|
||||||
if (right_lcp > max_lcp && right_ix < size - 1) {
|
|
||||||
max_lcp = right_lcp;
|
|
||||||
max_lcp_ix = right_ix;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (max_lcp >= min_length) {
|
if ((left_ix >= 0 && left_lcp >= min_length) ||
|
||||||
int dist = idx - sarray[max_lcp_ix];
|
(right_ix < size - 1 && right_lcp >= min_length)) {
|
||||||
if (dist <= 0) {
|
process_output(sarray, lcp, size, idx, left_ix, right_ix, left_lcp,
|
||||||
printf("idx = %d, pos[idx] = %u\n", idx, pos[idx]);
|
right_lcp);
|
||||||
printf("left_ix = %d, right_ix = %d\n",
|
}
|
||||||
left_ix, right_ix);
|
}
|
||||||
printf("left_lcp = %d, right_lcp = %d\n",
|
}
|
||||||
left_lcp, right_lcp);
|
|
||||||
printf("sarray[left_ix] = %d, sarray[right_ix] = %d\n",
|
void ProcessEntries(entry_type* entries, size_t size, FILE* fout) {
|
||||||
sarray[left_ix], sarray[right_ix]);
|
int long_length = FLAGS_long_length;
|
||||||
assert(dist > 0);
|
std::vector<std::pair<int, int> > segments;
|
||||||
|
size_t idx;
|
||||||
|
for (idx = 0; idx < size;) {
|
||||||
|
entry_type& entry = entries[idx];
|
||||||
|
if (entry.first > long_length) {
|
||||||
|
// Add segment.
|
||||||
|
if (segments.empty() || segments.back().second < idx) {
|
||||||
|
segments.push_back({idx, idx + entry.first});
|
||||||
|
} else {
|
||||||
|
segments.back().second = idx + entry.first;
|
||||||
}
|
}
|
||||||
fputc(1, fout);
|
}
|
||||||
fwrite(&idx, sizeof(int), 1, fout); // Position in input.
|
++idx;
|
||||||
fwrite(&dist, sizeof(int), 1, fout); // Backward distance.
|
}
|
||||||
|
printf("Segments generated.\n");
|
||||||
|
size_t segments_ix = 0;
|
||||||
|
for (idx = 0; idx < size;) {
|
||||||
|
if (idx == segments[segments_ix].first) {
|
||||||
|
// Skip segment.
|
||||||
|
idx = segments[segments_ix].second;
|
||||||
|
} else {
|
||||||
|
for (auto& dist : entries[idx].second) {
|
||||||
|
fputc(1, fout);
|
||||||
|
fwrite(&idx, sizeof(int), 1, fout); // Position in input.
|
||||||
|
fwrite(&dist, sizeof(int), 1, fout); // Backward distance.
|
||||||
|
}
|
||||||
|
++idx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -144,8 +224,44 @@ int main(int argc, char* argv[]) {
|
|||||||
lcp_type* lcp = new lcp_type[input_size];
|
lcp_type* lcp = new lcp_type[input_size];
|
||||||
BuildLCP(storage, sarray, lcp, input_size, pos);
|
BuildLCP(storage, sarray, lcp, input_size, pos);
|
||||||
printf("LCP array constructed.\n");
|
printf("LCP array constructed.\n");
|
||||||
|
delete[] storage;
|
||||||
|
|
||||||
|
using std::placeholders::_1;
|
||||||
|
using std::placeholders::_2;
|
||||||
|
using std::placeholders::_3;
|
||||||
|
using std::placeholders::_4;
|
||||||
|
using std::placeholders::_5;
|
||||||
|
using std::placeholders::_6;
|
||||||
|
using std::placeholders::_7;
|
||||||
|
using std::placeholders::_8;
|
||||||
|
entry_type* entries;
|
||||||
|
if (FLAGS_advanced) {
|
||||||
|
entries = new entry_type[input_size];
|
||||||
|
for (size_t i = 0; i < input_size; ++i) entries[i].first = -1;
|
||||||
|
}
|
||||||
|
Fn print = std::bind(PrintReference, _1, _2, _3, _4, _5, _6, _7, _8, fout);
|
||||||
|
Fn store = std::bind(StoreReference, _1, _2, _3, _4, _5, _6, _7, _8, entries);
|
||||||
|
|
||||||
|
ProcessReferences(sarray, lcp, input_size, pos,
|
||||||
|
FLAGS_advanced ? store : print);
|
||||||
|
printf("References processed.\n");
|
||||||
|
|
||||||
|
if (FLAGS_advanced) {
|
||||||
|
int good_cnt = 0;
|
||||||
|
uint64_t avg_cnt = 0;
|
||||||
|
for (size_t i = 0; i < input_size; ++i) {
|
||||||
|
if (entries[i].first != -1) {
|
||||||
|
++good_cnt;
|
||||||
|
avg_cnt += entries[i].second.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("Number of covered positions = %d\n", good_cnt);
|
||||||
|
printf("Average number of references per covered position = %.4lf\n",
|
||||||
|
static_cast<double>(avg_cnt) / good_cnt);
|
||||||
|
ProcessEntries(entries, input_size, fout);
|
||||||
|
printf("Entries processed.\n");
|
||||||
|
}
|
||||||
|
|
||||||
ProcessReferences(storage, sarray, lcp, input_size, pos, fout);
|
|
||||||
fclose(fout);
|
fclose(fout);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user