Add distance encoding research tools.

2024-11-21 19:20:09 +00:00 · 2016-09-15 10:44:19 +02:00 · 2016-09-15 10:44:19 +02:00 · 58cecf1783
commit 58cecf1783
parent 5ce9bf11b3
6 changed files with 879 additions and 0 deletions
--- a/research/Makefile
+++ b/research/Makefile
@ -0,0 +1,17 @@
+CC = g++
+CFLAGS += -O2
+CPPFLAGS += -std=c++11
+SOURCES = $(wildcard *.cc)
+EXECUTABLES = $(SOURCES:.cc=)
+BINDIR = bin
+
+all: $(EXECUTABLES)
+
+$(BINDIR):
+	mkdir -p $@
+
+$(EXECUTABLES): $(BINDIR)
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(addsuffix .cc, $@) -o $(BINDIR)/$@
+
+clean:
+	rm -rf $(BINDIR)
--- a/research/draw_diff.cc
+++ b/research/draw_diff.cc
@ -0,0 +1,104 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+   Author: vanickulin@google.com (Ivan Nikulin)
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Tool for drawing diff PPM images between two input PGM images. Normally used
+   with backward reference histogram drawing tool. */
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <vector>
+
+void ReadPGM(FILE* f, uint8_t*** image, size_t* height, size_t* width) {
+  int colors;
+  assert(fscanf(f, "P5\n%lu %lu\n%d\n", width, height, &colors) == 3);
+  assert(colors == 255);
+  *image = new uint8_t*[*height];
+  for (int i = *height - 1; i >= 0; --i) {
+    (*image)[i] = new uint8_t[*width];
+    assert(fread((*image)[i], 1, *width, f) == *width);
+  }
+}
+
+void CalculateDiff(int** diff, uint8_t** image1, uint8_t** image2,
+                   size_t height, size_t width) {
+  for (size_t i = 0; i < height; ++i) {
+    for (size_t j = 0; j < width; ++j) {
+      diff[i][j] = static_cast<int>(image1[i][j]) - image2[i][j];
+    }
+  }
+}
+
+void DrawDiff(int** diff, uint8_t** image1, uint8_t** image2,
+              size_t height, size_t width, FILE* f) {
+  int max = -1234;
+  int min = +1234;
+  for (size_t i = 0; i < height; ++i) {
+    for (size_t j = 0; j < width; ++j) {
+      if (max < diff[i][j]) max = diff[i][j];
+      if (min > diff[i][j]) min = diff[i][j];
+      int img_min = std::min(255 - image1[i][j], 255 - image2[i][j]);
+      if (max < img_min) max = img_min;
+    }
+  }
+
+  int abs_max = -min;
+  if (abs_max < max) abs_max = max;
+
+  fprintf(f, "P6\n%lu %lu\n%d\n", width, height, abs_max);
+
+  uint8_t* row = new uint8_t[3 * width];
+  for (int i = height - 1; i >= 0; --i) {
+    for (int j = 0; j < width; ++j) {
+      int min_val = std::min(255 - image1[i][j], 255 - image2[i][j]);
+      int max_val = std::max(min_val, abs(diff[i][j]));
+      if (diff[i][j] > 0) { /* red */
+        row[3 * j + 0] = abs_max - max_val + diff[i][j];
+        row[3 * j + 1] = abs_max - max_val;
+        row[3 * j + 2] = abs_max - max_val + min_val;
+      } else { /* green */
+        row[3 * j + 0] = abs_max - max_val;
+        row[3 * j + 1] = abs_max - max_val - diff[i][j];
+        row[3 * j + 2] = abs_max - max_val + min_val;
+      }
+    }
+    fwrite(row, 1, 3 * width, f);
+  }
+  delete[] row;
+}
+
+int main(int argc, char* argv[]) {
+  if (argc != 4) {
+    printf("usage: %s pgm1 pgm2 diff_ppm_path\n", argv[0]);
+    return 1;
+  }
+
+  FILE* fimage1 = fopen(argv[1], "rb");
+  FILE* fimage2 = fopen(argv[2], "rb");
+  FILE* fdiff = fopen(argv[3], "wb");
+
+  uint8_t **image1, **image2;
+  size_t h1, w1, h2, w2;
+  ReadPGM(fimage1, &image1, &h1, &w1);
+  ReadPGM(fimage2, &image2, &h2, &w2);
+  fclose(fimage1);
+  fclose(fimage2);
+  if (!(h1 == h2 && w1 == w2)) {
+    printf("Images must have the same size.\n");
+    return 1;
+  }
+
+  int** diff = new int*[h1];
+  for (size_t i = 0; i < h1; ++i) diff[i] = new int[w1];
+  CalculateDiff(diff, image1, image2, h1, w1);
+
+  DrawDiff(diff, image1, image2, h1, w1, fdiff);
+  fclose(fdiff);
+  return 0;
+}
--- a/research/draw_histogram.cc
+++ b/research/draw_histogram.cc
@ -0,0 +1,201 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+   Author: vanickulin@google.com (Ivan Nikulin)
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Backward reference visualization tool. Accepts file with backward references
+   as an input and produces PGM image with histogram of those references. */
+
+#include <algorithm> /* min */
+#include <cassert>
+#include <cstring> /* memset */
+#include <cmath> /* log, round */
+#include <cstdio> /* fscanf, fprintf */
+#include <cstdint>
+
+#include "./read_dist.h"
+
+const int FLAGS_height = 1000;  // Height of the resulting histogam.
+const int FLAGS_width = 1000;  // Width of the resulting histogam.
+int FLAGS_size;  // Size of the compressed file.
+const int FLAGS_brotli_window = 0;  // Size of brotli window in bits.
+const uint64_t FLAGS_min_distance = 0;  // Minimum distance.
+uint64_t FLAGS_max_distance = 0;  // Maximum distance.
+const bool FLAGS_with_copies = false;  // True if input contains copy length.
+const bool FLAGS_simple = false;  // True if using only black and white pixels.
+const bool FLAGS_linear = true;  // True if using linear distance mapping.
+const uint64_t FLAGS_skip = 0;  // Number of bytes to skip.
+
+inline double DistanceTransform(double x) {
+  static bool linear = FLAGS_linear;
+  if (linear) {
+    return x;
+  } else {
+    /* Using log^2 scale because log scale produces big white gap at the bottom
+       of image. */
+    return log(x) * log(x);
+  }
+}
+
+/* Mapping pixel density on arc function to increase contrast. */
+inline double DensityTransform(double x) {
+  double z = 255 - x;
+  return sqrt(255 * 255 - z * z);
+}
+
+inline int GetMaxDistance() {
+  return FLAGS_max_distance;
+}
+
+void AdjustPosition(int* pos) {
+  static uint32_t offset = 0;
+  static int last = 0;
+  static uint32_t window_size = (1 << FLAGS_brotli_window);
+  assert(*pos >= 0 && *pos < window_size);
+  if (*pos < last) {
+    offset += window_size;
+  }
+  last = *pos;
+  *pos += offset;
+}
+
+void CalculateStuff(FILE* fin, int** stuff) {
+  int height = FLAGS_height;
+  int width = FLAGS_width;
+  int skip = FLAGS_skip;
+  size_t min_distance = FLAGS_min_distance;
+
+  printf("height = %d, width = %d\n", height, width);
+
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      stuff[i][j] = 0;
+    }
+  }
+
+  int max_pos = FLAGS_size - skip;
+  double min_dist = min_distance > 0 ? DistanceTransform(min_distance) : 0;
+  double max_dist = DistanceTransform(GetMaxDistance()) - min_dist;
+  int copy, pos, distance, x, y;
+  double dist;
+  while (ReadBackwardReference(fin, &copy, &pos, &distance)) {
+    if (pos == -1) continue;  // In case when only insert is present.
+    if (distance < min_distance || distance >= GetMaxDistance()) continue;
+    if (FLAGS_brotli_window != 0) {
+      AdjustPosition(&pos);
+    }
+    if (pos >= skip && distance <= pos) {
+      pos -= skip;
+      if (pos >= max_pos) break;
+      dist = DistanceTransform(static_cast<double>(distance)) - min_dist;
+
+      x = std::min(static_cast<int>(round(dist / max_dist * height)),
+                   height - 1);
+      y = 1ul * pos * width / max_pos;
+      if (!(y >= 0 && y < width)) {
+        printf("pos = %d, max_pos = %d, y = %d\n", pos, max_pos, y);
+        assert(y >= 0 && y < width);
+      }
+
+      if (FLAGS_with_copies) {
+        int right = 1ul * (pos + copy - 1) * width / max_pos;
+        if (right < 0) {
+          printf("pos = %d, distance = %d, copy = %d, y = %d, right = %d\n",
+                  pos, distance, copy, y, right);
+          assert(right >= 0);
+        }
+        if (y == right) {
+          stuff[x][y] += copy;
+        } else {
+          int pos2 = static_cast<int>(ceil(1.0 * (y + 1) * max_pos / width));
+          stuff[x][y] += pos2 - pos;
+          for (int i = y + 1; i < right && i < width; ++i) {
+            stuff[x][i] += max_pos / width;  // Sometimes 1 more, but who cares.
+          }
+          // Make sure the match doesn't go beyond the image.
+          if (right < width) {
+            pos2 = static_cast<int>(ceil(1.0 * right * max_pos / width));
+            stuff[x][right] += pos + copy - 1 - pos2 + 1;
+          }
+        }
+      } else {
+        stuff[x][y]++;
+      }
+    }
+  }
+}
+
+void ConvertToPixels(int** stuff, uint8_t** pixel) {
+  int height = FLAGS_height;
+  int width = FLAGS_width;
+
+  int maxs = 0;
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      if (maxs < stuff[i][j]) maxs = stuff[i][j];
+    }
+  }
+
+  bool simple = FLAGS_simple;
+  double max_stuff = static_cast<double>(maxs);
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      if (simple) {
+        pixel[i][j] = stuff[i][j] > 0 ? 0 : 255;
+      } else {
+        pixel[i][j] = static_cast<uint8_t>(
+            255 - DensityTransform(stuff[i][j] / max_stuff * 255));
+      }
+    }
+  }
+}
+
+void DrawPixels(uint8_t** pixel, FILE* fout) {
+  int height = FLAGS_height;
+  int width = FLAGS_width;
+
+  fprintf(fout, "P5\n%d %d\n255\n", width, height);
+  for (int i = height - 1; i >= 0; i--) {
+    fwrite(pixel[i], 1, width, fout);
+  }
+}
+
+int main(int argc, char* argv[]) {
+  if (argc != 4) {
+    printf("usage: draw_histogram.cc dist_file input_size output_file\n");
+    return 1;
+  }
+
+  int height = FLAGS_height;
+  int width = FLAGS_width;
+
+  FILE* fin = fopen(argv[1], "r");
+  FILE* fout = fopen(argv[3], "wb");
+
+  FLAGS_size = atoi(argv[2]);
+
+  if (FLAGS_max_distance == 0) FLAGS_max_distance = FLAGS_size;
+
+  /* The bio of stuff:
+     1. Copy length of backward reference.
+     2. Number of backward references per pixel/bucket.
+   */
+  uint8_t** pixel = new uint8_t*[height];
+  int** stuff = new int*[height];
+  for (int i = 0; i < height; i++) {
+    pixel[i] = new uint8_t[width];
+    stuff[i] = new int[width];
+  }
+
+  CalculateStuff(fin, stuff);
+  fclose(fin);
+
+  ConvertToPixels(stuff, pixel);
+
+  DrawPixels(pixel, fout);
+  fclose(fout);
+
+  return 0;
+}
--- a/research/find_opt_references.cc
+++ b/research/find_opt_references.cc
@ -0,0 +1,147 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+   Author: vanickulin@google.com (Ivan Nikulin)
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Tool for generating optimal backward references for the input file. Uses
+   sais-lite library for building suffix array. */
+
+#include <algorithm>
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+
+#include "./sais.hxx"
+
+const int FLAGS_min_length = 1;  // Minimal length of found backward references.
+const int FLAGS_skip = 1;  // Number of bytes to skip.
+
+const size_t kFileBufferSize = (1 << 16);  // 64KB
+
+typedef int sarray_type;  // Can't make it unsigned because of templates :(
+typedef uint8_t input_type;
+typedef uint32_t lcp_type;
+
+void ReadInput(FILE* fin, input_type* storage, size_t input_size) {
+  size_t last_pos = 0;
+  size_t available_in;
+  fseek(fin, 0, SEEK_SET);
+  do {
+    available_in = fread(storage + last_pos, 1, kFileBufferSize, fin);
+    last_pos += available_in;
+  } while (available_in != 0);
+  assert(last_pos == input_size);
+}
+
+void BuildLCP(input_type* storage, sarray_type* sarray, lcp_type* lcp,
+              size_t size, uint32_t* pos) {
+  for (int i = 0; i < size; ++i) {
+    pos[sarray[i]] = i;
+  }
+  uint32_t k = 0;
+  lcp[size - 1] = 0;
+  for (int i = 0; i < size; ++i) {
+    if (pos[i] == size - 1) {
+      k = 0;
+      continue;
+    }
+    uint32_t j = sarray[pos[i] + 1];  // Suffix which follow i-th suffix in SA.
+    while (i + k < size && j + k < size && storage[i + k] == storage[j + k]) {
+      ++k;
+    }
+    lcp[pos[i]] = k;
+    if (k > 0) --k;
+  }
+}
+
+void ProcessReferences(input_type* storage, sarray_type* sarray, lcp_type* lcp,
+                       size_t size, uint32_t* pos, FILE* fout) {
+  int min_length = FLAGS_min_length;
+  for (int idx = FLAGS_skip; idx < size; ++idx) {
+    int max_lcp = -1;
+    int max_lcp_ix;
+    int left_lcp = -1;
+    int left_ix;
+    for (left_ix = pos[idx] - 1; left_ix >= 0; --left_ix) {
+      if (left_lcp == -1 || left_lcp > lcp[left_ix]) {
+        left_lcp = lcp[left_ix];
+      }
+      if (left_lcp == 0) break;
+      if (sarray[left_ix] < idx) break;
+    }
+    if (left_ix >= 0) {
+      max_lcp = left_lcp;
+      max_lcp_ix = left_ix;
+    }
+
+    int right_lcp = -1;
+    int right_ix;
+    for (right_ix = pos[idx]; right_ix < size - 1; ++right_ix) {
+      if (right_lcp == -1 || right_lcp > lcp[right_ix]) {
+        right_lcp = lcp[right_ix];
+      }
+      // Stop if we have better result from the left side already.
+      if (right_lcp < max_lcp) break;
+      if (right_lcp == 0) break;
+      if (sarray[right_ix] < idx) break;
+    }
+    if (right_lcp > max_lcp && right_ix < size - 1) {
+      max_lcp = right_lcp;
+      max_lcp_ix = right_ix;
+    }
+
+    if (max_lcp >= min_length) {
+      int dist = idx - sarray[max_lcp_ix];
+      if (dist <= 0) {
+        printf("idx = %d, pos[idx] = %u\n", idx, pos[idx]);
+        printf("left_ix = %d, right_ix = %d\n",
+                left_ix, right_ix);
+        printf("left_lcp = %d, right_lcp = %d\n",
+                left_lcp, right_lcp);
+        printf("sarray[left_ix] = %d, sarray[right_ix] = %d\n",
+                sarray[left_ix], sarray[right_ix]);
+        assert(dist > 0);
+      }
+      fputc(1, fout);
+      fwrite(&idx, sizeof(int), 1, fout);  // Position in input.
+      fwrite(&dist, sizeof(int), 1, fout);  // Backward distance.
+    }
+  }
+}
+
+int main(int argc, char* argv[]) {
+  if (argc != 3) {
+    printf("usage: %s input_file output_file\n", argv[0]);
+    return 1;
+  }
+
+  FILE* fin = fopen(argv[1], "rb");
+  FILE* fout = fopen(argv[2], "w");
+
+  fseek(fin, 0, SEEK_END);
+  int input_size = ftell(fin);
+  fseek(fin, 0, SEEK_SET);
+  printf("The file size is %u bytes\n", input_size);
+
+  input_type* storage = new input_type[input_size];
+
+  ReadInput(fin, storage, input_size);
+  fclose(fin);
+
+  sarray_type* sarray = new sarray_type[input_size];
+  saisxx(storage, sarray, input_size);
+  printf("Suffix array calculated.\n");
+
+  // Inverse suffix array.
+  uint32_t* pos = new uint32_t[input_size];
+
+  lcp_type* lcp = new lcp_type[input_size];
+  BuildLCP(storage, sarray, lcp, input_size, pos);
+  printf("LCP array constructed.\n");
+
+  ProcessReferences(storage, sarray, lcp, input_size, pos, fout);
+  fclose(fout);
+  return 0;
+}
--- a/research/read_dist.h
+++ b/research/read_dist.h
@ -0,0 +1,46 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+   Author: vanickulin@google.com (Ivan Nikulin)
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* API for reading distances from *.dist file.
+   The format of *.dist file is as follows: for each backward reference there is
+   a position-distance pair, also a copy length may be specified. Copy length is
+   prefixed with flag byte 0, position-distance pair is prefixed with flag
+   byte 1. Each number is a 32-bit integer. Copy length always comes before
+   position-distance pair. Standalone copy length is allowed, in this case it is
+   ignored. */
+
+#include <cassert>
+#include <cstdio>
+
+#ifndef BROTLI_RESEARCH_READ_DIST_H_
+#define BROTLI_RESEARCH_READ_DIST_H_
+
+/* Reads backwards reference from .dist file. Sets all missing fields to -1.
+   Returns false when EOF is met or input is corrupt. */
+bool ReadBackwardReference(FILE* fin, int* copy, int* pos, int* dist) {
+  int c = getc(fin);
+  if (c == EOF) return false;
+  if (c == 0) {
+    assert(fread(copy, sizeof(int), 1, fin) == 1);
+    if ((c = getc(fin)) != 1) {
+      ungetc(c, fin);
+      *pos = *dist = -1;
+    } else {
+      assert(fread(pos, sizeof(int), 1, fin) == 1);
+      assert(fread(dist, sizeof(int), 1, fin) == 1);
+    }
+  } else if (c != 1) {
+    return false;
+  } else {
+    assert(fread(pos, sizeof(int), 1, fin) == 1);
+    assert(fread(dist, sizeof(int), 1, fin) == 1);
+    *copy = -1;
+  }
+  return true;
+}
+
+#endif  /* BROTLI_RESEARCH_READ_DIST_H_ */
--- a/research/sais.hxx
+++ b/research/sais.hxx
@ -0,0 +1,364 @@
+/*
+ * sais.hxx for sais-lite
+ * Copyright (c) 2008-2009 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _SAIS_HXX
+#define _SAIS_HXX 1
+#ifdef __cplusplus
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable : 383 981 1418)
+// for icc 64-bit
+//#define __builtin_vsnprintf(a, b, c, d) __builtin_vsnprintf(a, b, c, (char *)d)
+#endif
+
+#include <iterator>
+#ifdef _OPENMP
+# include <omp.h>
+#endif
+
+namespace saisxx_private {
+
+/* find the start or end of each bucket */
+template<typename string_type, typename bucket_type, typename index_type>
+void
+getCounts(const string_type T, bucket_type C, index_type n, index_type k) {
+#ifdef _OPENMP
+  bucket_type D;
+  index_type i, j, p, sum, first, last;
+  int thnum, maxthreads = omp_get_max_threads();
+#pragma omp parallel default(shared) private(D, i, thnum, first, last)
+  {
+    thnum = omp_get_thread_num();
+    D = C + thnum * k;
+    first = n / maxthreads * thnum;
+    last = (thnum < (maxthreads - 1)) ? n / maxthreads * (thnum + 1) : n;
+    for(i = 0; i < k; ++i) { D[i] = 0; }
+    for(i = first; i < last; ++i) { ++D[T[i]]; }
+  }
+  if(1 < maxthreads) {
+#pragma omp parallel for default(shared) private(i, j, p, sum)
+    for(i = 0; i < k; ++i) {
+      for(j = 1, p = i + k, sum = C[i]; j < maxthreads; ++j, p += k) {
+        sum += C[p];
+      }
+      C[i] = sum;
+    }
+  }
+#else
+  index_type i;
+  for(i = 0; i < k; ++i) { C[i] = 0; }
+  for(i = 0; i < n; ++i) { ++C[T[i]]; }
+#endif
+}
+template<typename bucket_type, typename index_type>
+void
+getBuckets(const bucket_type C, bucket_type B, index_type k, bool end) {
+  index_type i, sum = 0;
+  if(end) { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum; } }
+  else { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum - C[i]; } }
+}
+
+/* compute SA and BWT */
+template<typename string_type, typename sarray_type,
+         typename bucket_type, typename index_type>
+void
+induceSA(string_type T, sarray_type SA, bucket_type C, bucket_type B,
+         index_type n, index_type k) {
+typedef typename std::iterator_traits<string_type>::value_type char_type;
+  sarray_type b;
+  index_type i, j;
+  char_type c0, c1;
+  /* compute SAl */
+  if(C == B) { getCounts(T, C, n, k); }
+  getBuckets(C, B, k, false); /* find starts of buckets */
+  b = SA + B[c1 = T[j = n - 1]];
+  *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
+  for(i = 0; i < n; ++i) {
+    j = SA[i], SA[i] = ~j;
+    if(0 < j) {
+      if((c0 = T[--j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
+      *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
+    }
+  }
+  /* compute SAs */
+  if(C == B) { getCounts(T, C, n, k); }
+  getBuckets(C, B, k, true); /* find ends of buckets */
+  for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
+    if(0 < (j = SA[i])) {
+      if((c0 = T[--j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
+      *--b = ((j == 0) || (T[j - 1] > c1)) ? ~j : j;
+    } else {
+      SA[i] = ~j;
+    }
+  }
+}
+template<typename string_type, typename sarray_type,
+         typename bucket_type, typename index_type>
+int
+computeBWT(string_type T, sarray_type SA, bucket_type C, bucket_type B,
+           index_type n, index_type k) {
+typedef typename std::iterator_traits<string_type>::value_type char_type;
+  sarray_type b;
+  index_type i, j, pidx = -1;
+  char_type c0, c1;
+  /* compute SAl */
+  if(C == B) { getCounts(T, C, n, k); }
+  getBuckets(C, B, k, false); /* find starts of buckets */
+  b = SA + B[c1 = T[j = n - 1]];
+  *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
+  for(i = 0; i < n; ++i) {
+    if(0 < (j = SA[i])) {
+      SA[i] = ~(c0 = T[--j]);
+      if(c0 != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
+      *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
+    } else if(j != 0) {
+      SA[i] = ~j;
+    }
+  }
+  /* compute SAs */
+  if(C == B) { getCounts(T, C, n, k); }
+  getBuckets(C, B, k, true); /* find ends of buckets */
+  for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
+    if(0 < (j = SA[i])) {
+      SA[i] = (c0 = T[--j]);
+      if(c0 != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
+      *--b = ((0 < j) && (T[j - 1] > c1)) ? ~((index_type)T[j - 1]) : j;
+    } else if(j != 0) {
+      SA[i] = ~j;
+    } else {
+      pidx = i;
+    }
+  }
+  return pidx;
+}
+
+/* find the suffix array SA of T[0..n-1] in {0..k}^n
+   use a working space (excluding s and SA) of at most 2n+O(1) for a constant alphabet */
+template<typename string_type, typename sarray_type, typename index_type>
+int
+suffixsort(string_type T, sarray_type SA,
+           index_type fs, index_type n, index_type k,
+           bool isbwt) {
+typedef typename std::iterator_traits<string_type>::value_type char_type;
+  sarray_type RA;
+  index_type i, j, m, p, q, plen, qlen, name, pidx = 0;
+  bool diff;
+  int c;
+#ifdef _OPENMP
+  int maxthreads = omp_get_max_threads();
+#else
+# define maxthreads 1
+#endif
+  char_type c0, c1;
+
+  /* stage 1: reduce the problem by at least 1/2
+     sort all the S-substrings */
+  if(fs < (maxthreads * k)) {
+    index_type *C, *B;
+    if((C = new index_type[maxthreads * k]) == 0) { return -2; }
+    B = (1 < maxthreads) ? C + k : C;
+    getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */
+#ifdef _OPENMP
+#pragma omp parallel for default(shared) private(i)
+#endif
+    for(i = 0; i < n; ++i) { SA[i] = 0; }
+    for(i = n - 2, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) {
+      if((c0 = T[i]) < (c1 + c)) { c = 1; }
+      else if(c != 0) { SA[--B[c1]] = i + 1, c = 0; }
+    }
+    induceSA(T, SA, C, B, n, k);
+    delete [] C;
+  } else {
+    sarray_type C, B;
+    C = SA + n;
+    B = ((1 < maxthreads) || (k <= (fs - k))) ? C + k : C;
+    getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */
+#ifdef _OPENMP
+#pragma omp parallel for default(shared) private(i)
+#endif
+    for(i = 0; i < n; ++i) { SA[i] = 0; }
+    for(i = n - 2, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) {
+      if((c0 = T[i]) < (c1 + c)) { c = 1; }
+      else if(c != 0) { SA[--B[c1]] = i + 1, c = 0; }
+    }
+    induceSA(T, SA, C, B, n, k);
+  }
+
+  /* compact all the sorted substrings into the first m items of SA
+     2*m must be not larger than n (proveable) */
+#ifdef _OPENMP
+#pragma omp parallel for default(shared) private(i, j, p, c0, c1)
+  for(i = 0; i < n; ++i) {
+    p = SA[i];
+    if((0 < p) && (T[p - 1] > (c0 = T[p]))) {
+      for(j = p + 1; (j < n) && (c0 == (c1 = T[j])); ++j) { }
+      if((j < n) && (c0 < c1)) { SA[i] = ~p; }
+    }
+  }
+  for(i = 0, m = 0; i < n; ++i) { if((p = SA[i]) < 0) { SA[m++] = ~p; } }
+#else
+  for(i = 0, m = 0; i < n; ++i) {
+    p = SA[i];
+    if((0 < p) && (T[p - 1] > (c0 = T[p]))) {
+      for(j = p + 1; (j < n) && (c0 == (c1 = T[j])); ++j) { }
+      if((j < n) && (c0 < c1)) { SA[m++] = p; }
+    }
+  }
+#endif
+  j = m + (n >> 1);
+#ifdef _OPENMP
+#pragma omp parallel for default(shared) private(i)
+#endif
+  for(i = m; i < j; ++i) { SA[i] = 0; } /* init the name array buffer */
+  /* store the length of all substrings */
+  for(i = n - 2, j = n, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) {
+    if((c0 = T[i]) < (c1 + c)) { c = 1; }
+    else if(c != 0) { SA[m + ((i + 1) >> 1)] = j - i - 1; j = i + 1; c = 0; }
+  }
+  /* find the lexicographic names of all substrings */
+  for(i = 0, name = 0, q = n, qlen = 0; i < m; ++i) {
+    p = SA[i], plen = SA[m + (p >> 1)], diff = true;
+    if(plen == qlen) {
+      for(j = 0; (j < plen) && (T[p + j] == T[q + j]); ++j) { }
+      if(j == plen) { diff = false; }
+    }
+    if(diff != false) { ++name, q = p, qlen = plen; }
+    SA[m + (p >> 1)] = name;
+  }
+
+  /* stage 2: solve the reduced problem
+     recurse if names are not yet unique */
+  if(name < m) {
+    RA = SA + n + fs - m;
+    for(i = m + (n >> 1) - 1, j = m - 1; m <= i; --i) {
+      if(SA[i] != 0) { RA[j--] = SA[i] - 1; }
+    }
+    if(suffixsort(RA, SA, fs + n - m * 2, m, name, false) != 0) { return -2; }
+    for(i = n - 2, j = m - 1, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) {
+      if((c0 = T[i]) < (c1 + c)) { c = 1; }
+      else if(c != 0) { RA[j--] = i + 1, c = 0; } /* get p1 */
+    }
+#ifdef _OPENMP
+#pragma omp parallel for default(shared) private(i)
+#endif
+    for(i = 0; i < m; ++i) { SA[i] = RA[SA[i]]; } /* get index in s */
+  }
+
+  /* stage 3: induce the result for the original problem */
+  if(fs < (maxthreads * k)) {
+    index_type *B, *C;
+    if((C = new index_type[maxthreads * k]) == 0) { return -2; }
+    B = (1 < maxthreads) ? C + k : C;
+    /* put all left-most S characters into their buckets */
+    getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */
+#ifdef _OPENMP
+#pragma omp parallel for default(shared) private(i)
+#endif
+    for(i = m; i < n; ++i) { SA[i] = 0; } /* init SA[m..n-1] */
+    for(i = m - 1; 0 <= i; --i) {
+      j = SA[i], SA[i] = 0;
+      SA[--B[T[j]]] = j;
+    }
+    if(isbwt == false) { induceSA(T, SA, C, B, n, k); }
+    else { pidx = computeBWT(T, SA, C, B, n, k); }
+    delete [] C;
+  } else {
+    sarray_type C, B;
+    C = SA + n;
+    B = ((1 < maxthreads) || (k <= (fs - k))) ? C + k : C;
+    /* put all left-most S characters into their buckets */
+    getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */
+#ifdef _OPENMP
+#pragma omp parallel for default(shared) private(i)
+#endif
+    for(i = m; i < n; ++i) { SA[i] = 0; } /* init SA[m..n-1] */
+    for(i = m - 1; 0 <= i; --i) {
+      j = SA[i], SA[i] = 0;
+      SA[--B[T[j]]] = j;
+    }
+    if(isbwt == false) { induceSA(T, SA, C, B, n, k); }
+    else { pidx = computeBWT(T, SA, C, B, n, k); }
+  }
+
+  return pidx;
+#ifndef _OPENMP
+# undef maxthreads
+#endif
+}
+
+} /* namespace saisxx_private */
+
+
+/**
+ * @brief Constructs the suffix array of a given string in linear time.
+ * @param T[0..n-1] The input string. (random access iterator)
+ * @param SA[0..n-1] The output array of suffixes. (random access iterator)
+ * @param n The length of the given string.
+ * @param k The alphabet size.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+template<typename string_type, typename sarray_type, typename index_type>
+int
+saisxx(string_type T, sarray_type SA, index_type n, index_type k = 256) {
+  int err;
+  if((n < 0) || (k <= 0)) { return -1; }
+  if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; }
+  try { err = saisxx_private::suffixsort(T, SA, 0, n, k, false); }
+  catch(...) { err = -2; }
+  return err;
+}
+
+/**
+ * @brief Constructs the burrows-wheeler transformed string of a given string in linear time.
+ * @param T[0..n-1] The input string. (random access iterator)
+ * @param U[0..n-1] The output string. (random access iterator)
+ * @param A[0..n-1] The temporary array. (random access iterator)
+ * @param n The length of the given string.
+ * @param k The alphabet size.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+template<typename string_type, typename sarray_type, typename index_type>
+index_type
+saisxx_bwt(string_type T, string_type U, sarray_type A, index_type n, index_type k = 256) {
+typedef typename std::iterator_traits<string_type>::value_type char_type;
+  index_type i, pidx;
+  if((n < 0) || (k <= 0)) { return -1; }
+  if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
+  try {
+    pidx = saisxx_private::suffixsort(T, A, 0, n, k, true);
+    if(0 <= pidx) {
+      U[0] = T[n - 1];
+      for(i = 0; i < pidx; ++i) { U[i + 1] = (char_type)A[i]; }
+      for(i += 1; i < n; ++i) { U[i] = (char_type)A[i]; }
+      pidx += 1;
+    }
+  } catch(...) { pidx = -2; }
+  return pidx;
+}
+
+
+#endif /* __cplusplus */
+#endif /* _SAIS_HXX */