Add partial_tukey and punchout_tukey apodization functions

Adds two new apodization functions that seem to perform better than the apodization functions currently in the codebase and fixes three existing windows as well. Its important to note that this patch only affects the encoder stage that evaluates various possible predictors. Audio encoded with these new windows will still decode with existing legacy decoders. = Theory = These functions are used to window the audio data at the predictor stage. These news functions enable the use of only part of the signal to generate a predictor. This helps because short transients can introduce noise into the predictor. The predictor becomes very good at prediciting one part of the signal, instead of mediocre for the whole block. Signed-off-by: Erik de Castro Lopo <erikd@mega-nerd.com>
2014-08-10 10:59:29 +02:00 · 2014-08-10 10:59:29 +02:00 · 29a28338c3
commit 29a28338c3
parent ffa55423e0
8 changed files with 152 additions and 12 deletions
--- a/doc/html/documentation_tools_flac.html
+++ b/doc/html/documentation_tools_flac.html
@ -852,10 +852,11 @@
 					<span class="argument">-A "function"</span>, <span class="argument">--apodization="function"</span>
 				</td>
 				<td>
-					Window audio data with given the apodization function.  The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), welch.<br />
+					Window audio data with given the apodization function.  The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), partial_tukey(n[/ov[/P]]), punchout_tukey(n[/ov[/P]]), welch.<br />
 					For gauss(STDDEV), STDDEV is the standard deviation (0&lt;STDDEV&lt;=0.5).<br />
 					For tukey(P), P specifies the fraction of the window that is tapered (0&lt;=P&lt;=1; P=0 corresponds to "rectangle" and P=1 corresponds to "hann").<br />
-					Please note that for both P as well as STDDEV, the use of a point or comma as decimal separator is locale-dependent.<br />
+					For partial_tukey(n) and punchout_tukey(n), n apodization functions are added that span different parts of each block. Values of 2 to 6 seem to yield sane results. If necessary, an overlap can be specified, as can be the taper parameter, for example partial_tukey(2/0.2) or partial_tukey(2/0.2/0.5). ov should be smaller than 1 and can be negative.<br />
+					Please note that P, STDDEV and ov are locale specific, so a comma as decimal separator might be required instead of a dot.<br />
 					More than one -A option (up to 32) may be used.  Any function that is specified erroneously is silently dropped.  The encoder chooses suitable defaults in the absence of any -A options; any -A option specified replaces the default(s).<br />
 					When more than one function is specified, then for every subframe the encoder will try each of them separately and choose the window that results in the smallest compressed subframe.  Multiple functions can greatly increase the encoding time.<br />
 				</td>
--- a/include/FLAC/stream_encoder.h
+++ b/include/FLAC/stream_encoder.h
@ -920,7 +920,8 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_loose_mid_side_stereo(FLAC__StreamE
 * The available functions are \c bartlett, \c bartlett_hann,
 * \c blackman, \c blackman_harris_4term_92db, \c connes, \c flattop,
 * \c gauss(STDDEV), \c hamming, \c hann, \c kaiser_bessel, \c nuttall,
- * \c rectangle, \c triangle, \c tukey(P), \c welch.
+ * \c rectangle, \c triangle, \c tukey(P), \c partial_tukey(n[/ov[/P]]),
+ * \c punchout_tukey(n[/ov[/P]]), \c welch.
 *
 * For \c gauss(STDDEV), STDDEV specifies the standard deviation
 * (0<STDDEV<=0.5).
@ -929,6 +930,24 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_loose_mid_side_stereo(FLAC__StreamE
 * tapered (0<=P<=1).  P=0 corresponds to \c rectangle and P=1
 * corresponds to \c hann.
 *
+ * Specifying \c partial_tukey or \c punchout_tukey works a little
+ * different. These do not specify a single apodization function, but
+ * a series of them with some overlap. partial_tukey specifies a series
+ * of small windows (all treated separately) while punchout_tukey
+ * specifies a series of windows that have a hole in them. In this way,
+ * the predictor is constructed with only a part of the block, which
+ * helps in case a block consists of dissimilar parts.
+ *
+ * The three parameters that can be specified for the functions are
+ * n, ov and P. n is the number of functions to add, ov is the overlap
+ * of the windows in case of partial_tukey and the overlap in the gaps
+ * in case of punchout_tukey. P is the fraction of the window that is
+ * tapered, like with a regular tukey window. The function can be
+ * specified with only a number, a number and an overlap, or a number
+ * an overlap and a P, for example, partial_tukey(3), partial_tukey(3/0.3)
+ * and partial_tukey(3/0.3/0.5) are all valid. ov should be smaller than 1
+ * and can be negative.
+ *
 * Example specifications are \c "blackman" or
 * \c "hann;triangle;tukey(0.5);tukey(0.25);tukey(0.125)"
 *
@ -941,7 +960,9 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_loose_mid_side_stereo(FLAC__StreamE
 * results in the smallest compressed subframe.
 *
 * Note that each function specified causes the encoder to occupy a
- * floating point array in which to store the window.
+ * floating point array in which to store the window. Also note that the
+ * values of P, STDDEV and ov are locale-specific, so if the comma
+ * separator specified by the locale is a comma, a comma should be used.
 *
 * \default \c "tukey(0.5)"
 * \param  encoder        An encoder instance to set.
--- a/man/flac.1
+++ b/man/flac.1
@ -280,12 +280,16 @@ Highest compression.  Currently synonymous with -8.
 Do exhaustive model search (expensive!)
 .TP
 \fB-A \fIfunction\fB, --apodization=\fIfunction\fB\fR
-Window audio data with given the apodization function.  The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), welch.
+Window audio data with given the apodization function.  The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), partial_tukey(n[/ov[/P]]), punchout_tukey(n[/ov[/P]]), welch.

 For gauss(STDDEV), STDDEV is the standard deviation (0<STDDEV<=0.5).

 For tukey(P), P specifies the fraction of the window that is tapered (0<=P<=1; P=0 corresponds to "rectangle" and P=1 corresponds to "hann").

+For partial_tukey(n) and punchout_tukey(n), n apodization functions are added that span different parts of each block. Values of 2 to 6 seem to yield sane results. If necessary, an overlap can be specified, as can be the taper parameter, for example partial_tukey(2/0.2) or partial_tukey(2/0.2/0.5). ov should be smaller than 1 and can be negative.
+
+Please note that P, STDDEV and ov are locale specific, so a comma as decimal separator might be required instead of a dot.
+
 More than one -A option (up to 32) may be used.  Any function that is specified erroneously is silently dropped.  The encoder chooses suitable defaults in the absence of any -A options; any -A option specified replaces the default(s).

 When more than one function is specified, then for every subframe the encoder will try each of them separately and choose the window that results in the smallest compressed subframe.  Multiple functions can greatly increase the encoding time.
--- a/man/flac.sgml
+++ b/man/flac.sgml
@ -612,9 +612,11 @@
 	  <term><option>-A</option> <replaceable>function</replaceable>, <option>--apodization</option>=<replaceable>function</replaceable></term>

 	  <listitem>
-	    <para>Window audio data with given the apodization function.  The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), welch.</para>
+	    <para>Window audio data with given the apodization function.  The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), partial_tukey(n[/ov[/P]]), punchout_tukey(n[/ov[/P]]), welch.</para>
 	    <para>For gauss(STDDEV), STDDEV is the standard deviation (0&lt;STDDEV&lt;=0.5).</para>
 	    <para>For tukey(P), P specifies the fraction of the window that is tapered (0&lt;=P&lt;=1; P=0 corresponds to "rectangle" and P=1 corresponds to "hann").</para>
+	    <para>For partial_tukey(n) and punchout_tukey(n), n apodization functions are added that span different parts of each block. Values of 2 to 6 seem to yield sane results. If necessary, an overlap can be specified, as can be the taper parameter, for example partial_tukey(2/0.2) or partial_tukey(2/0.2/0.5). ov should be smaller than 1 and can be negative.</para>
+	    <para>Please note that P, STDDEV and ov are locale specific, so a comma as decimal separator might be required instead of a dot.</para>
 	    <para>More than one -A option (up to 32) may be used.  Any function that is specified erroneously is silently dropped.  The encoder chooses suitable defaults in the absence of any -A options; any -A option specified replaces the default(s).</para>
 	    <para>When more than one function is specified, then for every subframe the encoder will try each of them separately and choose the window that results in the smallest compressed subframe.  Multiple functions can greatly increase the encoding time.</para>
 	  </listitem>
--- a/src/libFLAC/include/private/window.h
+++ b/src/libFLAC/include/private/window.h
@ -65,6 +65,8 @@ void FLAC__window_nuttall(FLAC__real *window, const FLAC__int32 L);
 void FLAC__window_rectangle(FLAC__real *window, const FLAC__int32 L);
 void FLAC__window_triangle(FLAC__real *window, const FLAC__int32 L);
 void FLAC__window_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p);
+void FLAC__window_partial_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end);
+void FLAC__window_punchout_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end);
 void FLAC__window_welch(FLAC__real *window, const FLAC__int32 L);

 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
--- a/src/libFLAC/include/protected/stream_encoder.h
+++ b/src/libFLAC/include/protected/stream_encoder.h
@ -59,6 +59,8 @@ typedef enum {
 	FLAC__APODIZATION_RECTANGLE,
 	FLAC__APODIZATION_TRIANGLE,
 	FLAC__APODIZATION_TUKEY,
+	FLAC__APODIZATION_PARTIAL_TUKEY,
+	FLAC__APODIZATION_PUNCHOUT_TUKEY,
 	FLAC__APODIZATION_WELCH
 } FLAC__ApodizationFunction;

@ -71,6 +73,11 @@ typedef struct {
 		struct {
 			FLAC__real p;
 		} tukey;
+		struct {
+			FLAC__real p;
+			FLAC__real start;
+			FLAC__real end;
+		} multiple_tukey;
 	} parameters;
 } FLAC__ApodizationSpecification;

--- a/src/libFLAC/stream_encoder.c
+++ b/src/libFLAC/stream_encoder.c
@ -1664,6 +1664,48 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_apodization(FLAC__StreamEncoder *en
 				encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_TUKEY;
 			}
 		}
+		else if(n>15   && 0 == strncmp("partial_tukey("       , specification, 14)) {
+			FLAC__int32 tukey_parts = (FLAC__int32)strtod(specification+14, 0);
+			const char *si_1 = strchr(specification, '/');
+			FLAC__real overlap = si_1?flac_min((FLAC__real)strtod(si_1+1, 0),0.99f):0.1f;
+			FLAC__real overlap_units = 1.0f/(1.0f - overlap) - 1.0f;
+			const char *si_2 = strchr((si_1?(si_1+1):specification), '/');
+			FLAC__real tukey_p = si_2?(FLAC__real)strtod(si_2+1, 0):0.2f;
+
+			if (tukey_parts <= 1) {
+				encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.tukey.p = tukey_p;
+				encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_TUKEY;
+			}else if (encoder->protected_->num_apodizations + tukey_parts < 32){
+				FLAC__int32 m;
+				for(m = 0; m < tukey_parts; m++){
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.p = tukey_p;
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.start = m/(tukey_parts+overlap_units);
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.end = (m+1+overlap_units)/(tukey_parts+overlap_units);
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_PARTIAL_TUKEY;
+				}
+			}
+		}
+		else if(n>16   && 0 == strncmp("punchout_tukey("       , specification, 15)) {
+			FLAC__int32 tukey_parts = (FLAC__int32)strtod(specification+15, 0);
+			const char *si_1 = strchr(specification, '/');
+			FLAC__real overlap = si_1?flac_min((FLAC__real)strtod(si_1+1, 0),0.99f):0.2f;
+			FLAC__real overlap_units = 1.0f/(1.0f - overlap) - 1.0f;
+			const char *si_2 = strchr((si_1?(si_1+1):specification), '/');
+			FLAC__real tukey_p = si_2?(FLAC__real)strtod(si_2+1, 0):0.2f;
+
+			if (tukey_parts <= 1) {
+				encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.tukey.p = tukey_p;
+				encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_TUKEY;
+			}else if (encoder->protected_->num_apodizations + tukey_parts < 32){
+				FLAC__int32 m;
+				for(m = 0; m < tukey_parts; m++){
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.p = tukey_p;
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.start = m/(tukey_parts+overlap_units);
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.end = (m+1+overlap_units)/(tukey_parts+overlap_units);
+					encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_PUNCHOUT_TUKEY;
+				}
+			}
+		}
 		else if(n==5  && 0 == strncmp("welch"        , specification, n))
 			encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_WELCH;
 		if (encoder->protected_->num_apodizations == 32)
@ -2443,6 +2485,12 @@ FLAC__bool resize_buffers_(FLAC__StreamEncoder *encoder, unsigned new_blocksize)
 				case FLAC__APODIZATION_TUKEY:
 					FLAC__window_tukey(encoder->private_->window[i], new_blocksize, encoder->protected_->apodizations[i].parameters.tukey.p);
 					break;
+				case FLAC__APODIZATION_PARTIAL_TUKEY:
+					FLAC__window_partial_tukey(encoder->private_->window[i], new_blocksize, encoder->protected_->apodizations[i].parameters.multiple_tukey.p, encoder->protected_->apodizations[i].parameters.multiple_tukey.start, encoder->protected_->apodizations[i].parameters.multiple_tukey.end);
+					break;
+				case FLAC__APODIZATION_PUNCHOUT_TUKEY:
+					FLAC__window_punchout_tukey(encoder->private_->window[i], new_blocksize, encoder->protected_->apodizations[i].parameters.multiple_tukey.p, encoder->protected_->apodizations[i].parameters.multiple_tukey.start, encoder->protected_->apodizations[i].parameters.multiple_tukey.end);
+					break;
 				case FLAC__APODIZATION_WELCH:
 					FLAC__window_welch(encoder->private_->window[i], new_blocksize);
 					break;
--- a/src/libFLAC/window.c
+++ b/src/libFLAC/window.c
@ -58,7 +58,7 @@ void FLAC__window_bartlett(FLAC__real *window, const FLAC__int32 L)
 		for (n = 0; n <= L/2-1; n++)
 			window[n] = 2.0f * n / (float)N;
 		for (; n <= N; n++)
-			window[n] = 2.0f - 2.0f * (N-n) / (float)N;
+			window[n] = 2.0f - 2.0f * n / (float)N;
 	}
 }

@ -68,7 +68,7 @@ void FLAC__window_bartlett_hann(FLAC__real *window, const FLAC__int32 L)
 	FLAC__int32 n;

 	for (n = 0; n < L; n++)
-		window[n] = (FLAC__real)(0.62f - 0.48f * fabs((float)n/(float)N+0.5f) + 0.38f * cos(2.0f * M_PI * ((float)n/(float)N+0.5f)));
+		window[n] = (FLAC__real)(0.62f - 0.48f * fabs((float)n/(float)N-0.5f) - 0.38f * cos(2.0f * M_PI * ((float)n/(float)N)));
 }

 void FLAC__window_blackman(FLAC__real *window, const FLAC__int32 L)
@ -173,16 +173,16 @@ void FLAC__window_triangle(FLAC__real *window, const FLAC__int32 L)
 	FLAC__int32 n;

 	if (L & 1) {
-		for (n = 1; n <= L+1/2; n++)
+		for (n = 1; n <= (L+1)/2; n++)
 			window[n-1] = 2.0f * n / ((float)L + 1.0f);
 		for (; n <= L; n++)
-			window[n-1] = - (float)(2 * (L - n + 1)) / ((float)L + 1.0f);
+			window[n-1] = (float)(2 * (L - n + 1)) / ((float)L + 1.0f);
 	}
 	else {
 		for (n = 1; n <= L/2; n++)
-			window[n-1] = 2.0f * n / (float)L;
+			window[n-1] = 2.0f * n / ((float)L + 1.0f);
 		for (; n <= L; n++)
-			window[n-1] = ((float)(2 * (L - n)) + 1.0f) / (float)L;
+			window[n-1] = (float)(2 * (L - n + 1)) / ((float)L + 1.0f);
 	}
 }

@ -207,6 +207,61 @@ void FLAC__window_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__rea
 	}
 }

+void FLAC__window_partial_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end)
+{
+	const FLAC__int32 start_n = (FLAC__int32)(start * L);
+	const FLAC__int32 end_n = (FLAC__int32)(end * L);
+	const FLAC__int32 N = end_n - start_n;
+	FLAC__int32 Np, n, i;
+
+	if (p <= 0.0)
+		FLAC__window_partial_tukey(window, L, 0.01, start, end);
+	else if (p >= 1.0)
+		FLAC__window_partial_tukey(window, L, 1, start, end);
+
+	Np = (FLAC__int32)(p / 2.0f * N) - 1;
+
+	for (n = 0; n < start_n; n++)
+		window[n] = 0.0f;
+	for (i = 1; n < (start_n+Np); n++, i++)
+		window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Np));
+	for (; n < (end_n-Np); n++)
+		window[n] = 1.0f;
+	for (i = Np; n < end_n; n++, i--)
+		window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Np));
+	for (; n < L; n++)
+		window[n] = 0.0f;
+}
+void FLAC__window_punchout_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end)
+{
+	const FLAC__int32 start_n = (FLAC__int32)(start * L);
+	const FLAC__int32 end_n = (FLAC__int32)(end * L);
+	FLAC__int32 Ns, Ne, n, i;
+
+	if (p <= 0.0)
+		FLAC__window_partial_tukey(window, L, 0.01, start, end);
+	else if (p >= 1.0)
+		FLAC__window_partial_tukey(window, L, 1, start, end);
+
+	Ns = (FLAC__int32)(p / 2.0f * start_n);
+	Ne = (FLAC__int32)(p / 2.0f * (L - end_n));
+
+	for (n = 0, i = 1; n < Ns; n++, i++)
+		window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ns));
+	for (; n < start_n-Ns; n++)
+		window[n] = 1.0f;
+	for (i = Ns; n < start_n; n++, i--)
+		window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ns));
+	for (; n < end_n; n++)
+		window[n] = 0.0f;
+	for (i = 1; n < end_n+Ne; n++, i++)
+		window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ne));
+	for (; n < L - (Ne); n++)
+		window[n] = 1.0f;
+	for (i = Ne; n < L; n++, i--)
+		window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ne));
+}
+
 void FLAC__window_welch(FLAC__real *window, const FLAC__int32 L)
 {
 	const FLAC__int32 N = L - 1;