mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-16 13:44:14 +00:00
30891f35fa
We stopped adding "Contributed by" or similar lines in sources in 2012 in favour of git logs and keeping the Contributors section of the glibc manual up to date. Removing these lines makes the license header a bit more consistent across files and also removes the possibility of error in attribution when license blocks or files are copied across since the contributed-by lines don't actually reflect reality in those cases. Move all "Contributed by" and similar lines (Written by, Test by, etc.) into a new file CONTRIBUTED-BY to retain record of these contributions. These contributors are also mentioned in manual/contrib.texi, so we just maintain this additional record as a courtesy to the earlier developers. The following scripts were used to filter a list of files to edit in place and to clean up the CONTRIBUTED-BY file respectively. These were not added to the glibc sources because they're not expected to be of any use in future given that this is a one time task: https://gist.github.com/siddhesh/b5ecac94eabfd72ed2916d6d8157e7dc https://gist.github.com/siddhesh/15ea1f5e435ace9774f485030695ee02 Reviewed-by: Carlos O'Donell <carlos@redhat.com>
4488 lines
136 KiB
ArmAsm
4488 lines
136 KiB
ArmAsm
.file "tgammal.s"
|
|
|
|
|
|
// Copyright (c) 2002 - 2005, Intel Corporation
|
|
// All rights reserved.
|
|
//
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
//
|
|
// * Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
//
|
|
// * The name of Intel Corporation may not be used to endorse or promote
|
|
// products derived from this software without specific prior written
|
|
// permission.
|
|
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Intel Corporation is the author of this code, and requests that all
|
|
// problem reports or change requests be submitted to it directly at
|
|
// http://www.intel.com/software/products/opensource/libraries/num.htm.
|
|
//
|
|
// History
|
|
//==============================================================
|
|
// 01/16/02 Initial version
|
|
// 05/20/02 Cleaned up namespace and sf0 syntax
|
|
// 02/10/03 Reordered header: .section, .global, .proc, .align;
|
|
// used data8 for long double table values
|
|
// 03/17/03 Moved tgammal_libm_err label into .proc region
|
|
// 04/10/03 Changed error codes for overflow and negative integers
|
|
// 03/31/05 Reformatted delimiters between data tables
|
|
//
|
|
// API
|
|
//==============================================================
|
|
// long double tgammal(long double)
|
|
//
|
|
// Resources Used:
|
|
//
|
|
// Floating-Point Registers: f8-f15
|
|
// f32-f127
|
|
//
|
|
// General Purpose Registers: r32-r67
|
|
//
|
|
// Predicate Registers: p6-p15
|
|
//
|
|
//*********************************************************************
|
|
//
|
|
// IEEE Special Conditions:
|
|
//
|
|
// tgammal(+inf) = +inf
|
|
// tgammal(-inf) = QNaN
|
|
// tgammal(+/-0) = +/-inf
|
|
// tgammal(x<0, x - integer) = QNaN
|
|
// tgammal(SNaN) = QNaN
|
|
// tgammal(QNaN) = QNaN
|
|
//
|
|
//*********************************************************************
|
|
// Overview of operation
|
|
//==============================================================
|
|
//
|
|
// Algorithm description
|
|
// ---------------------
|
|
//
|
|
// There are 3 main paths in the implementation
|
|
// (and additional special values branches)
|
|
//
|
|
// 1) |X| >= 13 - Stirling formula computation
|
|
// a) Positive arguments:
|
|
// TGAMMAL(X) = exp((X-0.5)*ln(X) - X + C + S(Z)),
|
|
// where C = 0.5*ln(2*Pi) , Z = 1/Z, S(Z) - Bernulli polynomial
|
|
// (up to 'B18' term).
|
|
// Some of these calculation done in multiprecision.
|
|
// Ln returns multiprecision result too
|
|
// and exp also accepts and returns pair of values.
|
|
//
|
|
// b) Negative arguments
|
|
// TGAMMAL(-X) = PI/(X*TGAMMAL(X)*sin(PI*X)).
|
|
// (X*sin(PI*X))/PI calculated in parallel with TGAMMAL.
|
|
// Here we use polynomial of 9th degree with 2 multiprecision steps.
|
|
// Argument range reduction is:
|
|
// N = [x] with round to nearest, r = x - N, -0.5 <= r < 0.5
|
|
// After ((X-0.5)*ln(X) - X + C + S(Z)) completed we just invert
|
|
// its result and compute exp with negative argument (1/exp(x)=exp(-x))
|
|
// Then we multiply exp result to PI/(X*sin(PI*X)).
|
|
//
|
|
// 2) 1 <= |X| < 13 - Polynomial part
|
|
// a) Positive arguments:
|
|
// All values are splitted to such intervals as:
|
|
// #0->[2;3], #1->[3,4], #2->[5,6]...
|
|
// For even intervals we just use polynomial computation with degree 20
|
|
// and first 6 multiprecision computations.
|
|
// Range reduction looks like
|
|
// N = [x] with truncate, r = x - N - 0.5, -0.5 <= r < 0.5
|
|
// For odd intervals we use reccurent formula:
|
|
// TGAMMAL(X) = TGAMMA(X-1)*(X-1)
|
|
// [1;2] interval is splitted to 3 subranges:
|
|
// [1;1.25], [1.25;1.75], [1.75;2] with the same polynomial forms
|
|
//
|
|
// b) Negative arguments
|
|
// TGAMMAL(-X) = PI/(X*TGAMMAL(X)*sin(PI*X)).
|
|
// (X*sin(PI*X))/PI calculated in parallel with TGAMMAL.
|
|
// After multiplication by TGAMMAL(X) result we calculate reciprocal
|
|
// and get final result.
|
|
//
|
|
// 3) 0 < |X| < 1 - Near 0 part
|
|
// a) Here we use reccurent formula TGAMMAL(X) = TGAMMAL(X+1)/X
|
|
// TGAMMAL(X+1) calculated as shown above,
|
|
// 1/X result obtained in parallel. Then we just multiply these values.
|
|
// There is only additional separated subrange: [0;0.125] with specific
|
|
// polynomial constants set.
|
|
//
|
|
// b) Negative arguments
|
|
// TGAMMAL(-X) = PI/(TGAMMAL(X+1)*sin(PI*X)).
|
|
// There is no need to compute 1/X.
|
|
|
|
|
|
|
|
RODATA
|
|
|
|
.align 16
|
|
LOCAL_OBJECT_START(Constants_Tgammal_log_80_Q)
|
|
// log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1
|
|
data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
|
|
data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
|
|
data4 0xA51BE0AF,0x92492453,0x00003FFC,0x00000000
|
|
data4 0xA0CFD29F,0xAAAAAB73,0x0000BFFC,0x00000000
|
|
data4 0xCCCE3872,0xCCCCCCCC,0x00003FFC,0x00000000
|
|
data4 0xFFFFB4FB,0xFFFFFFFF,0x0000BFFC,0x00000000
|
|
data4 0xAAAAAAAB,0xAAAAAAAA,0x00003FFD,0x00000000
|
|
data4 0x00000000,0x80000000,0x0000BFFE,0x00000000
|
|
LOCAL_OBJECT_END(Constants_Tgammal_log_80_Q)
|
|
|
|
.align 64
|
|
LOCAL_OBJECT_START(Constants_Tgammal_log_80_Z_G_H_h1)
|
|
// Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double
|
|
data4 0x00008000,0x3F800000,0x00000000,0x00000000
|
|
data4 0x00000000,0x00000000,0x00000000,0x00000000
|
|
data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000
|
|
data4 0xEBA0E0D1,0x8B1D330B,0x00003FDA,0x00000000
|
|
data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000
|
|
data4 0x9EADD553,0xE2AF365E,0x00003FE2,0x00000000
|
|
data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000
|
|
data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000
|
|
data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000
|
|
data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000
|
|
data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000
|
|
data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000
|
|
data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000
|
|
data4 0x457978A1,0x8718789F,0x00003FE2,0x00000000
|
|
data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000
|
|
data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000
|
|
data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000
|
|
data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000
|
|
data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000
|
|
data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000
|
|
data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000
|
|
data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000
|
|
data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000
|
|
data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000
|
|
data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000
|
|
data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000
|
|
data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000
|
|
data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000
|
|
data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000
|
|
data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000
|
|
data4 0x00004211,0x3F042108,0x3F29516A,0x00000000
|
|
data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000
|
|
LOCAL_OBJECT_END(Constants_Tgammal_log_80_Z_G_H_h1)
|
|
|
|
.align 64
|
|
LOCAL_OBJECT_START(Constants_Tgammal_log_80_Z_G_H_h2)
|
|
// Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE double
|
|
data4 0x00008000,0x3F800000,0x00000000,0x00000000
|
|
data4 0x00000000,0x00000000,0x00000000,0x00000000
|
|
data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000
|
|
data4 0x211398BF,0xAD08B116,0x00003FDB,0x00000000
|
|
data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000
|
|
data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000
|
|
data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000
|
|
data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000
|
|
data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000
|
|
data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000
|
|
data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000
|
|
data4 0x049EB22F,0xD1B87D3C,0x00003FDE,0x00000000
|
|
data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000
|
|
data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000
|
|
data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000
|
|
data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000
|
|
data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000
|
|
data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000
|
|
data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000
|
|
data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000
|
|
data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000
|
|
data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000
|
|
data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000
|
|
data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000
|
|
data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000
|
|
data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000
|
|
data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000
|
|
data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000
|
|
data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000
|
|
data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000
|
|
data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000
|
|
data4 0xBE11C424,0xA49C8DB5,0x0000BFE0,0x00000000
|
|
LOCAL_OBJECT_END(Constants_Tgammal_log_80_Z_G_H_h2)
|
|
|
|
.align 64
|
|
LOCAL_OBJECT_START(Constants_Tgammal_log_80_h3_G_H)
|
|
// h3 IEEE double extended, H3 and G3 IEEE single
|
|
data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00
|
|
data4 0x9B7FAD21,0x90051030,0x00003FD8,0x3F7FF400
|
|
data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00
|
|
data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400
|
|
data4 0xCA964D95,0xCE65C1D8,0x0000BFD8,0x3F7FDC00
|
|
data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400
|
|
data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08
|
|
data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408
|
|
data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10
|
|
data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410
|
|
data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18
|
|
data4 0x2F053150,0xB25CA912,0x0000BFDA,0x3F7FA420
|
|
data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20
|
|
data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428
|
|
data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30
|
|
data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438
|
|
data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40
|
|
data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448
|
|
data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50
|
|
data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458
|
|
data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68
|
|
data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470
|
|
data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78
|
|
data4 0x1F34A7EB,0x9A995A97,0x0000BFDC,0x3F7F4488
|
|
data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90
|
|
data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0
|
|
data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8
|
|
data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8
|
|
data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8
|
|
data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8
|
|
data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0
|
|
data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0
|
|
data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here
|
|
data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D
|
|
data4 0x3A87F20B,0x3A97F68B,0x3AA7EB86,0x3AB7E101
|
|
data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED
|
|
data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766
|
|
data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6
|
|
data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620
|
|
data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D
|
|
LOCAL_OBJECT_END(Constants_Tgammal_log_80_h3_G_H)
|
|
|
|
.align 64
|
|
LOCAL_OBJECT_START(Constants_Tgammal_stirling)
|
|
//0.5*ln(2*Pi)=9.1893853320467266954096885e-01 + 7.2239360881843238220057778e-17
|
|
data8 0x3FED67F1C864BEB4, 0x3C94D252F2400510
|
|
// Bernulli numbers
|
|
data8 0xAAAAAAAAAAAAAAAB, 0x00003FFB //B2 = 8.3333333333333333333333333333e-02
|
|
data8 0xBF66C16C16C16C17 //B4 = -2.7777777777777777777777777778e-03
|
|
data8 0x3F4A01A01A01A01A //B6 = 7.9365079365079365079365079365e-04
|
|
data8 0xBF43813813813814 //B8 = -5.9523809523809523809523809524e-04
|
|
data8 0x3F4B951E2B18FF23 //B10 = 8.4175084175084175084175084175e-04
|
|
data8 0xBF5F6AB0D9993C7D //B12 = -1.9175269175269175269175269175e-03
|
|
data8 0x3F7A41A41A41A41A //B14 = 6.4102564102564102564102564103e-03
|
|
data8 0xBF9E4286CB0F5398 //B16 = -2.9550653594771241830065359477e-02
|
|
data8 0x3FC6FE96381E0680 //B18 = 1.7964437236883057316493849002e-01
|
|
data8 0x3FE0000000000000 // 0.5
|
|
LOCAL_OBJECT_END(Constants_Tgammal_stirling)
|
|
|
|
.align 64
|
|
LOCAL_OBJECT_START(Constants_Tgammal_sin)
|
|
// Polynomial coefficients for the sin(Pi*x)/Pi, 0 <= |x| < 0.5
|
|
//A2 = 8.1174242528335360802316245099e-01 + 5.1302254650266899774269946201e-18
|
|
data8 0x3FE9F9CB402BC46C, 0x3C57A8B3819B7CEC
|
|
//A1 = -1.6449340668482264060656916627e+00 + -3.0210280454695477893051351574e-17
|
|
data8 0xBFFA51A6625307D3, 0xBC816A402079D0EF
|
|
data8 0xF3AEF1FFCCE6C813, 0x0000BFE3 //A9 = -7.0921197799923779127089910470e-09
|
|
data8 0x87D54408E6D4BB9D, 0x00003FE9 //A8 = 2.5300880778252693946712766029e-07
|
|
data8 0xEA12033DCE7B8ED9, 0x0000BFED //A7 = -6.9758403885461690048189307819e-06
|
|
data8 0x9BA38C952A59D1A8, 0x00003FF2 //A6 = 1.4842878710882320255092707181e-04
|
|
data8 0x99C0B55178FF0E38, 0x0000BFF6 //A5 = -2.3460810348048124421268761990e-03
|
|
data8 0xD63402E798FEC896, 0x00003FF9 //A4 = 2.6147847817611456327417812320e-02
|
|
data8 0xC354723906D95E92, 0x0000BFFC //A3 = -1.9075182412208257558294507774e-01
|
|
LOCAL_OBJECT_END(Constants_Tgammal_sin)
|
|
|
|
.align 64
|
|
LOCAL_OBJECT_START(Constants_Tgammal_exp_64_Arg)
|
|
data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000 // L_hi = hi part log(2)/2^12
|
|
data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000 // L_lo = lo part log(2)/2^12
|
|
LOCAL_OBJECT_END(Constants_Tgammal_exp_64_Arg)
|
|
|
|
LOCAL_OBJECT_START(Constants_Tgammal_exp_64_A)
|
|
data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000 // A3
|
|
data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000 // A2
|
|
data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000 // A1
|
|
LOCAL_OBJECT_END(Constants_Tgammal_exp_64_A)
|
|
|
|
LOCAL_OBJECT_START(Constants_Tgammal_exp_64_T1)
|
|
data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
|
|
data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
|
|
data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
|
|
data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
|
|
data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
|
|
data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516
|
|
data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A
|
|
data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4
|
|
data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B
|
|
data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD
|
|
data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15
|
|
data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B
|
|
data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
|
|
data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
|
|
data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
|
|
data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
|
|
LOCAL_OBJECT_END(Constants_Tgammal_exp_64_T1)
|
|
|
|
LOCAL_OBJECT_START(Constants_Tgammal_exp_64_T2)
|
|
data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
|
|
data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
|
|
data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
|
|
data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
|
|
data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
|
|
data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
|
|
data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
|
|
data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
|
|
data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
|
|
data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
|
|
data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
|
|
data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
|
|
data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
|
|
data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
|
|
data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
|
|
data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
|
|
LOCAL_OBJECT_END(Constants_Tgammal_exp_64_T2)
|
|
|
|
LOCAL_OBJECT_START(Constants_Tgammal_exp_64_W1)
|
|
data8 0x0000000000000000, 0xBE384454171EC4B4
|
|
data8 0xBE6947414AA72766, 0xBE5D32B6D42518F8
|
|
data8 0x3E68D96D3A319149, 0xBE68F4DA62415F36
|
|
data8 0xBE6DDA2FC9C86A3B, 0x3E6B2E50F49228FE
|
|
data8 0xBE49C0C21188B886, 0x3E64BFC21A4C2F1F
|
|
data8 0xBE6A2FBB2CB98B54, 0x3E5DC5DE9A55D329
|
|
data8 0x3E69649039A7AACE, 0x3E54728B5C66DBA5
|
|
data8 0xBE62B0DBBA1C7D7D, 0x3E576E0409F1AF5F
|
|
data8 0x3E6125001A0DD6A1, 0xBE66A419795FBDEF
|
|
data8 0xBE5CDE8CE1BD41FC, 0xBE621376EA54964F
|
|
data8 0x3E6370BE476E76EE, 0x3E390D1A3427EB92
|
|
data8 0x3E1336DE2BF82BF8, 0xBE5FF1CBD0F7BD9E
|
|
data8 0xBE60A3550CEB09DD, 0xBE5CA37E0980F30D
|
|
data8 0xBE5C541B4C082D25, 0xBE5BBECA3B467D29
|
|
data8 0xBE400D8AB9D946C5, 0xBE5E2A0807ED374A
|
|
data8 0xBE66CB28365C8B0A, 0x3E3AAD5BD3403BCA
|
|
data8 0x3E526055C7EA21E0, 0xBE442C75E72880D6
|
|
data8 0x3E58B2BB85222A43, 0xBE5AAB79522C42BF
|
|
data8 0xBE605CB4469DC2BC, 0xBE589FA7A48C40DC
|
|
data8 0xBE51C2141AA42614, 0xBE48D087C37293F4
|
|
data8 0x3E367A1CA2D673E0, 0xBE51BEBB114F7A38
|
|
data8 0xBE6348E5661A4B48, 0xBDF526431D3B9962
|
|
data8 0x3E3A3B5E35A78A53, 0xBE46C46C1CECD788
|
|
data8 0xBE60B7EC7857D689, 0xBE594D3DD14F1AD7
|
|
data8 0xBE4F9C304C9A8F60, 0xBE52187302DFF9D2
|
|
data8 0xBE5E4C8855E6D68F, 0xBE62140F667F3DC4
|
|
data8 0xBE36961B3BF88747, 0x3E602861C96EC6AA
|
|
data8 0xBE3B5151D57FD718, 0x3E561CD0FC4A627B
|
|
data8 0xBE3A5217CA913FEA, 0x3E40A3CC9A5D193A
|
|
data8 0xBE5AB71310A9C312, 0x3E4FDADBC5F57719
|
|
data8 0x3E361428DBDF59D5, 0x3E5DB5DB61B4180D
|
|
data8 0xBE42AD5F7408D856, 0x3E2A314831B2B707
|
|
LOCAL_OBJECT_END(Constants_Tgammal_exp_64_W1)
|
|
|
|
LOCAL_OBJECT_START(Constants_Tgammal_exp_64_W2)
|
|
data8 0x0000000000000000, 0xBE641F2537A3D7A2
|
|
data8 0xBE68DD57AD028C40, 0xBE5C77D8F212B1B6
|
|
data8 0x3E57878F1BA5B070, 0xBE55A36A2ECAE6FE
|
|
data8 0xBE620608569DFA3B, 0xBE53B50EA6D300A3
|
|
data8 0x3E5B5EF2223F8F2C, 0xBE56A0D9D6DE0DF4
|
|
data8 0xBE64EEF3EAE28F51, 0xBE5E5AE2367EA80B
|
|
data8 0x3E47CB1A5FCBC02D, 0xBE656BA09BDAFEB7
|
|
data8 0x3E6E70C6805AFEE7, 0xBE6E0509A3415EBA
|
|
data8 0xBE56856B49BFF529, 0x3E66DD3300508651
|
|
data8 0x3E51165FC114BC13, 0x3E53333DC453290F
|
|
data8 0x3E6A072B05539FDA, 0xBE47CD877C0A7696
|
|
data8 0xBE668BF4EB05C6D9, 0xBE67C3E36AE86C93
|
|
data8 0xBE533904D0B3E84B, 0x3E63E8D9556B53CE
|
|
data8 0x3E212C8963A98DC8, 0xBE33138F032A7A22
|
|
data8 0x3E530FA9BC584008, 0xBE6ADF82CCB93C97
|
|
data8 0x3E5F91138370EA39, 0x3E5443A4FB6A05D8
|
|
data8 0x3E63DACD181FEE7A, 0xBE62B29DF0F67DEC
|
|
data8 0x3E65C4833DDE6307, 0x3E5BF030D40A24C1
|
|
data8 0x3E658B8F14E437BE, 0xBE631C29ED98B6C7
|
|
data8 0x3E6335D204CF7C71, 0x3E529EEDE954A79D
|
|
data8 0x3E5D9257F64A2FB8, 0xBE6BED1B854ED06C
|
|
data8 0x3E5096F6D71405CB, 0xBE3D4893ACB9FDF5
|
|
data8 0xBDFEB15801B68349, 0x3E628D35C6A463B9
|
|
data8 0xBE559725ADE45917, 0xBE68C29C042FC476
|
|
data8 0xBE67593B01E511FA, 0xBE4A4313398801ED
|
|
data8 0x3E699571DA7C3300, 0x3E5349BE08062A9E
|
|
data8 0x3E5229C4755BB28E, 0x3E67E42677A1F80D
|
|
data8 0xBE52B33F6B69C352, 0xBE6B3550084DA57F
|
|
data8 0xBE6DB03FD1D09A20, 0xBE60CBC42161B2C1
|
|
data8 0x3E56ED9C78A2B771, 0xBE508E319D0FA795
|
|
data8 0xBE59482AFD1A54E9, 0xBE2A17CEB07FD23E
|
|
data8 0x3E68BF5C17365712, 0x3E3956F9B3785569
|
|
LOCAL_OBJECT_END(Constants_Tgammal_exp_64_W2)
|
|
|
|
|
|
|
|
LOCAL_OBJECT_START(Constants_Tgammal_poly)
|
|
|
|
// Polynomial coefficients for the tgammal(x), 2 <= |x| < 3
|
|
//A5 = 2.8360780594841213109180699803e-02 + 2.2504152891014320704380000000e-19
|
|
data8 0x3F9D0A9BC49353D2, 0x3C109AEA0F23CE2D
|
|
//A4 = 1.0967323400216015538699565468e-01 + 9.9225166000430644587276000000e-18
|
|
data8 0x3FBC138B89492C5B, 0x3C66E138506D5652
|
|
//A3 = 2.5387124684114281691904579930e-01 + 2.2667777637607113205546600000e-17
|
|
data8 0x3FD03F6D2FA4F4F8, 0x3C7A2258DA8CD8B1
|
|
data8 0xC5866457328BC39B, 0x00003FE3 //A20 = 5.7487331964156762795056629138e-09
|
|
data8 0xE93D9F1ACD59C929, 0x0000BFE4 //A19= -1.3576396100397317396956445658e-08
|
|
data8 0xE33389C8F6CBA813, 0x00003FE5 //A18 = 2.6449714924964597501721434271e-08
|
|
data8 0x8FE7B25B9CD26D2A, 0x0000BFE7 //A17= -6.7011017946055513660266853311e-08
|
|
data8 0xB89F4721BFBC15B0, 0x00003FE8 //A16 = 1.7194280320370423615174419192e-07
|
|
data8 0xE49CBDC1874EBABA, 0x0000BFE9 //A15= -4.2582353660153782928729466776e-07
|
|
data8 0x913AF50A336129CA, 0x00003FEB //A14 = 1.0820500665257088283172211622e-06
|
|
data8 0xABCF0F7313B3B332, 0x0000BFEC //A13= -2.5601510627710417669568115706e-06
|
|
//A2 = 6.5455857798133676439533701341e-01 + 1.3292075193155190798867000000e-18
|
|
data8 0x3FE4F224D4B7E01C, 0x3C3885014A2B8319
|
|
//A1 = 9.3473452162608550164435428087e-01 + 3.2785154201417136611642400000e-17
|
|
data8 0x3FEDE9585F1A7093, 0x3C82E63C1B5028BF
|
|
//A0 = 1.3293403881791368004172682049e+00 + 2.2005689328949279282607500000e-16
|
|
data8 0x3FF544FA6D47B38F, 0x3CAFB6AA9829E81F
|
|
data8 0xF3668F799997C76D, 0x00003FED //A12 = 7.2539039479124273660331538367e-06
|
|
data8 0xD6C6BBD54CDEAEB1, 0x0000BFEE //A11= -1.2801665282681088568639378920e-05
|
|
data8 0x809E4763B06F6883, 0x00003FF1 //A10 = 6.1329973609906572700697893187e-05
|
|
data8 0x8443B000F8F9A71A, 0x00003FED //A9 = 3.9417864189995544394564413428e-06
|
|
data8 0xC5C7E6D62A6991D8, 0x00003FF4 //A8 = 7.5447412886334708803357581519e-04
|
|
data8 0xD2AF690725C62D88, 0x00003FF5 //A7 = 1.6074004848394703022110823298e-03
|
|
data8 0xAA44E635D4B7B682, 0x00003FF8 //A6 = 1.0392403425906843901680697839e-02
|
|
//
|
|
// Polynomial coefficients for the tgammal(x), 4 <= |x| < 5
|
|
//A5 = 1.1600674810589555185913468449e+00 + 3.0229979112715124660731000000e-17
|
|
data8 0x3FF28FA2EB44D22E, 0x3C816D285234C815
|
|
//A4 = 3.1374268565470946334983182169e+00 + 1.3694868953995008497659600000e-16
|
|
data8 0x400919734073B1E1, 0x3CA3BC83CD7E9565
|
|
//A3 = 7.0834593993741057360580271052e+00 + 3.3899702569039156457249800000e-16
|
|
data8 0x401C5576617B6C1F, 0x3CB86D6431213296
|
|
data8 0xA4A5FB49C094966B, 0x00003FDA //A20 = 9.3591760106637809309720130828e-12
|
|
data8 0xA9260DA0F51D7ED8, 0x00003FDD //A19 = 7.6919898428091669411809372180e-11
|
|
data8 0xA16441DFB14BD6E1, 0x00003FE0 //A18 = 5.8713933014370867331213494535e-10
|
|
data8 0x95F098D9C2234849, 0x00003FE3 //A17 = 4.3638234584169302324461091035e-09
|
|
data8 0x8581817400E5AD2B, 0x00003FE6 //A16 = 3.1084260332429955234755367839e-08
|
|
data8 0xE272940E373EBE15, 0x00003FE8 //A15 = 2.1089573544273993580820317236e-07
|
|
data8 0xB6B3391145D226FB, 0x00003FEB //A14 = 1.3612217421122787182942706259e-06
|
|
data8 0x8B9428C4DF95FCD5, 0x00003FEE //A13 = 8.3195416382628990683949003789e-06
|
|
//A2 = 1.2665135075272345943631080445e+01 + 9.8721896915973874255877000000e-16
|
|
data8 0x4029548C95A76F38, 0x3CD1C8BE715B8E13
|
|
//A1 = 1.6154969393303069580269948347e+01 + 9.6850518810678379641029000000e-16
|
|
data8 0x403027AC12FC1E1E, 0x3CD172711C15501B
|
|
//A0 = 1.1631728396567448058362970187e+01 + 8.7078125362814179268673000000e-16
|
|
data8 0x40274371E7866C65, 0x3CCF5F8A1A5FACA0
|
|
data8 0xC94A903114272C03, 0x00003FF0 //A12 = 4.7991576836334427243159066630e-05
|
|
data8 0x8844262960E04BE6, 0x00003FF3 //A11 = 2.5990716419283017929486175141e-04
|
|
data8 0xAC5418A76767678D, 0x00003FF5 //A10 = 1.3147621245497801180184809726e-03
|
|
data8 0xCA231B6EFE959132, 0x00003FF7 //A9 = 6.1687358811367989146517222415e-03
|
|
data8 0xDA38E39C13819D2A, 0x00003FF9 //A8 = 2.6638454961912040754759086920e-02
|
|
data8 0xD696DF8D8389FE53, 0x00003FFB //A7 = 1.0477995539298934056097943975e-01
|
|
data8 0xBDD5C153048BC435, 0x00003FFD //A6 = 3.7077144754791605130056406006e-01
|
|
//
|
|
// Polynomial coefficients for the tgammal(x), 6 <= |x| < 7
|
|
//A5 = 6.7169398121054200601065531373e+01 + 2.9481001527213915901489600000e-15
|
|
data8 0x4050CAD76B377BA0, 0x3CEA8DDB2B2DE93E
|
|
//A4 = 1.6115104376855398982115730178e+02 + 1.3422421925418824418257300000e-14
|
|
data8 0x406424D559BDC687, 0x3D0E397FDB5B33DC
|
|
//A3 = 3.1812194028053562533386866562e+02 + 3.9881709875858650942409600000e-14
|
|
data8 0x4073E1F377A6CF73, 0x3D26738F63FE9C4C
|
|
data8 0xD6E1B5FF90CAABD3, 0x00003FE1 //A20 = 1.5634700199277480081025480635e-09
|
|
data8 0xD451987B925DD37E, 0x00003FE4 //A19 = 1.2358576813211397717382327174e-08
|
|
data8 0xBFC151B67FA58E6B, 0x00003FE7 //A18 = 8.9292951435632759686382657901e-08
|
|
data8 0xA9034C5E1D67572E, 0x00003FEA //A17 = 6.2962205718327848327368724720e-07
|
|
data8 0x8E40F6EAA30A71EC, 0x00003FED //A16 = 4.2394926442967995119170095258e-06
|
|
data8 0xE3C3541B03A1C350, 0x00003FEF //A15 = 2.7151465666109594512258841637e-05
|
|
data8 0xACE2E58436B2DDCE, 0x00003FF2 //A14 = 1.6487723793339152877117376243e-04
|
|
data8 0xF7EAF8D8D1CAA3D1, 0x00003FF4 //A13 = 9.4573158112768812533636022369e-04
|
|
//A2 = 4.8664351544258869353143381886e+02 + 4.7424047995944376868895400000e-14
|
|
data8 0x407E6A4BD6D9463B, 0x3D2AB2868D79E192
|
|
//A1 = 5.1615277644992545447166776285e+02 + 3.0901956935588717379242200000e-14
|
|
data8 0x40802138E2DC003B, 0x3D216570FB601AEA
|
|
//A0 = 2.8788527781504433278314536437e+02 + 2.8213174117085164944959600000e-14
|
|
data8 0x4071FE2A1911F7D6, 0x3D1FC3E4CF4DB5AF
|
|
data8 0xA72B88E48D3D1BAB, 0x00003FF7 //A12 = 5.1016252919939028020562237471e-03
|
|
data8 0xD2EFB1067DB4FFB2, 0x00003FF9 //A11 = 2.5749059441230515023024615917e-02
|
|
data8 0xF788AF9522205C24, 0x00003FFB //A10 = 1.2086617635601742290221382521e-01
|
|
data8 0x861A6CE06CB29EAF, 0x00003FFE //A9 = 5.2384071807018493367136112163e-01
|
|
data8 0x84FBDE0947718B58, 0x00004000 //A8 = 2.0778727617851237754568261869e+00
|
|
data8 0xEEC1371E265A2C3A, 0x00004001 //A7 = 7.4610858525146049022238037342e+00
|
|
data8 0xBF514B9BE68ED59D, 0x00004003 //A6 = 2.3914694993947572859629197920e+01
|
|
//
|
|
// Polynomial coefficients for the tgammal(x), 8 <= |x| < 9
|
|
//A5 = 5.8487447114416836484451778233e+03 + 4.7365465221455983144182900000e-13
|
|
data8 0x40B6D8BEA568B6FD, 0x3D60AA4D44C2589B
|
|
//A4 = 1.2796464063087094473303295672e+04 + 1.2373341702514898266244200000e-12
|
|
data8 0x40C8FE3B666B532D, 0x3D75C4752C5B4783
|
|
//A3 = 2.2837606581322281272150576115e+04 + 2.6598064610627891398831000000e-13
|
|
data8 0x40D64D66D23A7764, 0x3D52B77B3A10EA5C
|
|
data8 0xB23418F75B0BE22A, 0x00003FE9 //A20 = 3.3192989594206801808678663868e-07
|
|
data8 0xA984A7BC8B856ED2, 0x00003FEC //A19 = 2.5260177918662350066375115788e-06
|
|
data8 0x921A49729416372C, 0x00003FEF //A18 = 1.7416797068239475136398213598e-05
|
|
data8 0xF5BB9415CC399CA4, 0x00003FF1 //A17 = 1.1717449586392814601938207599e-04
|
|
data8 0xC50B91A40B81F9DF, 0x00003FF4 //A16 = 7.5166775151159345732094429036e-04
|
|
data8 0x96002572326DB203, 0x00003FF7 //A15 = 4.5776541559407384162139204300e-03
|
|
data8 0xD81A1A595E4157BA, 0x00003FF9 //A14 = 2.6379634345126284099420760736e-02
|
|
data8 0x92B700D0CFECADD8, 0x00003FFC //A13 = 1.4327622675407940907282658100e-01
|
|
//A2 = 3.1237895525940199149772524834e+04 + 3.1280450505163186432331700000e-12
|
|
data8 0x40DE8179504C0878, 0x3D8B83BB33FBB766
|
|
//A1 = 2.9192841741344487672904506326e+04 + 7.9300780509779689630767000000e-13
|
|
data8 0x40DC8235DF171691, 0x3D6BE6C780EE54DF
|
|
//A0 = 1.4034407293483411194756627083e+04 + 1.4038139346291543309253700000e-12
|
|
data8 0x40CB693422315F90, 0x3D78B23746113FCE
|
|
data8 0xBAE50807548BC711, 0x00003FFE //A12 = 7.3005724123917935346868107005e-01
|
|
data8 0xDE28B1F57E68CFB6, 0x00004000 //A11 = 3.4712338349724065462763671443e+00
|
|
data8 0xF4DCA5A5FF901118, 0x00004002 //A10 = 1.5303868912154033908205911714e+01
|
|
data8 0xF85AAA1AD5E84E5E, 0x00004004 //A9 = 6.2088539523416399361048051373e+01
|
|
data8 0xE5AA8BB1BF02934D, 0x00004006 //A8 = 2.2966619406617480799195651466e+02
|
|
data8 0xBF6CFEFD67F59845, 0x00004008 //A7 = 7.6570306334640770654588802417e+02
|
|
data8 0x8DB5D2F001635C29, 0x0000400A //A6 = 2.2673639984182571062068713002e+03
|
|
//
|
|
// Polynomial coefficients for the tgammal(x), 10 <= |x| < 11
|
|
//A5 = 7.2546009516580589115619659424e+05 + 1.0343348865365065212891728822e-10
|
|
data8 0x412623A830B99290, 0x3DDC6E7C157611C4
|
|
//A4 = 1.4756292870840241666883230209e+06 + 8.1516565365333844166705674775e-11
|
|
data8 0x4136842D497E56AF, 0x3DD66837E4C3F9EE
|
|
//A3 = 2.4356116926500420086085796356e+06 + 3.5508860076560925641351069404e-10
|
|
data8 0x4142950DD8A8C1AF, 0x3DF866C8E3DD0980
|
|
data8 0xB7FD0D1EEAC38EB4, 0x00003FF1 //A20 = 8.7732544640091602721643775932e-05
|
|
data8 0xA9345C64AC750AE9, 0x00003FF4 //A19 = 6.4546407626804942279126469603e-04
|
|
data8 0x8BEABC81BE1E93C9, 0x00003FF7 //A18 = 4.2699261134524096128048819443e-03
|
|
data8 0xE1CD281EDD7315F8, 0x00003FF9 //A17 = 2.7563646660310313164706189622e-02
|
|
data8 0xAD8A5BA6D0FD9758, 0x00003FFC //A16 = 1.6947310643831556048460963841e-01
|
|
data8 0xFCDDA464AD3F182E, 0x00003FFE //A15 = 9.8775699098518676937088606052e-01
|
|
data8 0xAE0DCE2F7B60D1AE, 0x00004001 //A14 = 5.4391852309591064073782104822e+00
|
|
data8 0xE1745D9ABEB8D1A7, 0x00004003 //A13 = 2.8181819161363002758615770457e+01
|
|
//A2 = 3.0619656223573554307222366333e+06 + 1.0819940302945474471259520006e-10
|
|
data8 0x41475C66CFA967E4, 0x3DDDBDDB2A27334B
|
|
//A1 = 2.6099413018962685018777847290e+06 + 3.6851882860056025385268615240e-10
|
|
data8 0x4143E98AA6A48974, 0x3DF9530D42589AB6
|
|
//A0 = 1.1332783889487853739410638809e+06 + 1.9339350553312096248591829758e-10
|
|
data8 0x41314ADE639225C9, 0x3DEA946DD6C2C8D3
|
|
data8 0x88BCFAAE71812A1C, 0x00004006 //A12 = 1.3673820009490115307300592012e+02
|
|
data8 0x9A770F5AB540A326, 0x00004008 //A11 = 6.1786031215382040427126476507e+02
|
|
data8 0xA170C1D2C6B413FC, 0x0000400A //A10 = 2.5830473201524594051391525170e+03
|
|
data8 0x9AE56061CB02EB55, 0x0000400C //A9 = 9.9133441230507404119297200255e+03
|
|
data8 0x872390769650FBE2, 0x0000400E //A8 = 3.4595564309496661629764193479e+04
|
|
data8 0xD3E5E8D6923910C1, 0x0000400F //A7 = 1.0849181904819284819615140521e+05
|
|
data8 0x930D70602F50B754, 0x00004011 //A6 = 3.0116351174131169193070583741e+05
|
|
//
|
|
// Polynomial coefficients for the tgammal(x), 12 <= |x| < 13
|
|
//A5 = 1.2249876249976964294910430908e+08 + 6.0051348061679753770848000000e-09
|
|
data8 0x419D34BB29FFC39D, 0x3E39CAB72E01818D
|
|
//A4 = 2.3482765927605420351028442383e+08 + 1.1874729051592862323641700000e-08
|
|
data8 0x41ABFE5F168D56FA, 0x3E4980338AA7B04B
|
|
//A3 = 3.6407329688125067949295043945e+08 + 2.6657200942150363994658700000e-08
|
|
data8 0x41B5B35150E199A5, 0x3E5C9F79C0EB5300
|
|
data8 0xE89AE0F8D726329D, 0x00003FF9 //A20 = 2.8394164465429105626588451540e-02
|
|
data8 0xCF90981F86E38013, 0x00003FFC //A19 = 2.0270002071785908652476845915e-01
|
|
data8 0xA56C658079CA8C4A, 0x00003FFF //A18 = 1.2923704984019263122675412350e+00
|
|
data8 0x80AEF96A67C5615A, 0x00004002 //A17 = 8.0427183300456238315262463506e+00
|
|
data8 0xBE886D7529678931, 0x00004004 //A16 = 4.7633230047847868242503413461e+01
|
|
data8 0x858EDBA4CE2F7508, 0x00004007 //A15 = 2.6711607799594541057655957154e+02
|
|
data8 0xB0B0A3AF388274F0, 0x00004009 //A14 = 1.4135199810126975119809102782e+03
|
|
data8 0xDBA87137988751EF, 0x0000400B //A13 = 7.0290552818218513870879313985e+03
|
|
//A2 = 4.2828433593031734228134155273e+08 + 3.9760422293645854535247300000e-08
|
|
data8 0x41B98719AFEE2947, 0x3E6558A17E0D3007
|
|
//A1 = 3.4008253676084774732589721680e+08 + 1.2558352335001093116071000000e-09
|
|
data8 0x41B4453F68C2C6EB, 0x3E159338C5BC7EC3
|
|
//A0 = 1.3684336546556583046913146973e+08 + 2.6786516700381562934240300000e-08
|
|
data8 0x41A05020CAEE5EA5, 0x3E5CC3058A858579
|
|
data8 0xFF5E3940FB4BA576, 0x0000400D //A12 = 3.2687111823895439312116108631e+04
|
|
data8 0x8A08C124C7F74B6C, 0x00004010 //A11 = 1.4134701786994123329786229006e+05
|
|
data8 0x89D701953540BFFB, 0x00004012 //A10 = 5.6459209892773907605385652281e+05
|
|
data8 0xFC46344B3116C3AD, 0x00004013 //A9 = 2.0666305367147234406757715163e+06
|
|
data8 0xD183EBD7A400151F, 0x00004015 //A8 = 6.8653979211730981618367536737e+06
|
|
data8 0x9C083A40742112F4, 0x00004017 //A7 = 2.0451444503543981795037456447e+07
|
|
data8 0xCD3C475B1A8B6662, 0x00004018 //A6 = 5.3801245423495149598177886823e+07
|
|
LOCAL_OBJECT_END(Constants_Tgammal_poly)
|
|
|
|
|
|
LOCAL_OBJECT_START(Constants_Tgammal_poly_splitted)
|
|
|
|
// Polynomial coefficients for the tgammal(x), 1 <= |x| < 1.25
|
|
//A5 = -9.8199506890310417350775651357e-01+ -3.2546247786122976510752200000e-17
|
|
data8 0xBFEF6C80EC38B509, 0xBC82C2FA7A3DE3BD
|
|
//A4 = 9.8172808683439960475425323239e-01 + 4.4847611775298520359811400000e-17
|
|
data8 0x3FEF6A51055096B0, 0x3C89DA56DE95EFE4
|
|
//A3 = -9.0747907608088618225394839101e-01 +-1.0244057366544064435443970000e-16
|
|
data8 0xBFED0A118F324B62, 0xBC9D86C7B9EBCFFF
|
|
data8 0xB8E3FDAA66CC738E, 0x00003FFB //A20 = 9.0278608095877488976217714815e-02
|
|
data8 0xA76067AE1738699C, 0x0000BFFD //A19 =-3.2690738678103132837070881737e-01
|
|
data8 0x9D66B13718408C44, 0x00003FFE //A18 = 6.1484820933424283818320582920e-01
|
|
data8 0xD4AC67BBB4AE5599, 0x0000BFFE //A17 =-8.3075569470082063491389474937e-01
|
|
data8 0xF1426ED1C1488DB3, 0x00003FFE //A16 = 9.4241993542644505594957058785e-01
|
|
data8 0xFC12EB07AA6F4B6B, 0x0000BFFE //A15 =-9.8466366707947121954333549690e-01
|
|
data8 0xFF2B32CFE5B0DDC8, 0x00003FFE //A14 = 9.9675290656677214804168895915e-01
|
|
data8 0xFFD8E7E6FF3662EA, 0x0000BFFE //A13 =-9.9940347089360552383472582319e-01
|
|
//A2 = 9.8905599532797250361682017683e-01 + 5.1760162410376024240867300000e-17
|
|
data8 0x3FEFA658C23B1578, 0x3C8DD673A61F6FE7
|
|
//A1 = -5.7721566490153275452712478000e-01+ -1.0607935612223465065923310000e-16
|
|
data8 0xBFE2788CFC6FB618, 0xBC9E9346622D53B7
|
|
//A0 = 9.9999999999999988897769753748e-01 + 1.1102230245372554544790880000e-16
|
|
data8 0x3FEFFFFFFFFFFFFF, 0x3C9FFFFFFFF51E4E
|
|
data8 0xFFF360DF628F0BC9, 0x00003FFE //A12 = 9.9980740979895815468216470840e-01
|
|
data8 0xFFEF8F9A72B40480, 0x0000BFFE //A11 = -9.9974916001038145045939523470e-01
|
|
data8 0xFFE037B8C7E39952, 0x00003FFE //A10 = 9.9951504002809911822597567307e-01
|
|
data8 0xFFC01E08F348BED2, 0x0000BFFE //A9 = -9.9902522772325406705059517941e-01
|
|
data8 0xFF83DAC83119B52C, 0x00003FFE //A8 = 9.9810569179053383842734164901e-01
|
|
data8 0xFEF9F8AB891ABB24, 0x0000BFFE //A7 = -9.9600176036720260345608796766e-01
|
|
data8 0xFE3F0537573C8235, 0x00003FFE //A6 = 9.9314911461918778676646301341e-01
|
|
//
|
|
// Polynomial coefficients for the tgammal(x), 1.25 <= |x| < 1.75
|
|
//A5 = -7.7523052299853054125655660300e-02+ -1.2693512521686721504433600000e-17
|
|
data8 0xBFB3D88CFE50601B, 0xBC6D44ED60EE2170
|
|
//A4 = 1.4464535904462152982041800442e-01 + 2.5426820829345729856648800000e-17
|
|
data8 0x3FC283BD374EB2A9, 0x3C7D50AC436187C3
|
|
//A3 = -1.0729480456477220873257039102e-01+ -6.2429894945456418196551000000e-18
|
|
data8 0xBFBB77AC1CA2EBA5, 0xBC5CCA6BCC422D41
|
|
data8 0xF732D2689F323283, 0x00003FF2 //A20 = 2.3574688251652899567587145422e-04
|
|
data8 0xB6B00E23DE89D13A, 0x0000BFF3 //A19 =-3.4844916488842618776630058875e-04
|
|
data8 0xE98396FE4A1B2799, 0x00003FF3 //A18 =4.4539265198744452020440735977e-04
|
|
data8 0xAF8D235A640DB1A2, 0x0000BFF4 //A17 =-6.6967514303333563295261178346e-04
|
|
data8 0x8513B736C918B261, 0x00003FF5 //A16 = 1.0152970456990865810615917715e-03
|
|
data8 0xC790A1A2C78D8E17, 0x0000BFF5 //A15 =-1.5225598630329403515321688394e-03
|
|
data8 0x959706CFA638CDE2, 0x00003FF6 //A14 = 2.2825614575133879623648932383e-03
|
|
data8 0xE050A6021E129860, 0x0000BFF6 //A13 =-3.4227757733947066666295285936e-03
|
|
//A2 = 4.1481345368830113695679528973e-01 + 3.1252439808354284892632100000e-17
|
|
data8 0x3FDA8C4DBA620D56, 0x3C82040BCB483C76
|
|
//A1 = 3.2338397448885010387886751460e-02 + 3.4437825798552300531443100000e-18
|
|
data8 0x3FA08EA88EE561B1, 0x3C4FC366D6C64806
|
|
//A0 = 8.8622692545275794095971377828e-01 + 7.2689375867553992399219000000e-17
|
|
data8 0x3FEC5BF891B4EF6A, 0x3C94F3877D311C0C
|
|
data8 0xA8275AADC09D16FC, 0x00003FF7 //A12 = 5.1316445128621071486146117136e-03
|
|
data8 0xFBFE2CE9215267A2, 0x0000BFF7 //A11= -7.6902121820788373000579382408e-03
|
|
data8 0xBCC8EEAB67ECD91D, 0x00003FF8 //A10 = 1.1522515369164312742737727262e-02
|
|
data8 0x8D1614BB97E5E8C2, 0x0000BFF9 //A9 = -1.7222443097804730395560633583e-02
|
|
data8 0xD3A963578BE291E3, 0x00003FF9 //A8 = 2.5837606456090186343624210891e-02
|
|
data8 0x9BA7EAE64C42FDF7, 0x0000BFFA //A7 = -3.8001935555045161419575037512e-02
|
|
data8 0xF0115BA1A77607E7, 0x00003FFA //A6 = 5.8610303817173477119764956736e-02
|
|
//
|
|
// Polynomial coefficients for the tgammal(x), 1.75 <= |x| < 2.0
|
|
//A5 = 2.6698206874501426502654943818e-04 + 3.4033756836921062797887300000e-20
|
|
data8 0x3F317F3740FE2A68, 0x3BE417093234B06E
|
|
//A4 = 7.4249010753513894345090307070e-02 + 3.9810018444482764697014200000e-18
|
|
data8 0x3FB301FBB0F25A92, 0x3C525BEFFABB622F
|
|
//A3 = -8.1576919247086265851720554565e-02+ -5.2716624487804746360745000000e-19
|
|
data8 0xBFB4E239984650AC, 0xBC2372F1C4F276FF
|
|
data8 0xFEF3AEE71038E9A3, 0x00003FEB //A20 = 1.8995395865421509009969188571e-06
|
|
data8 0xA11CFA2672BF876A, 0x0000BFEB //A19 =-1.2003868221414015771269244270e-06
|
|
data8 0xF8E107215DAE2164, 0x00003FEC //A18 = 3.7085863210303833432006027217e-06
|
|
data8 0xBCDDD3FC011EF7D6, 0x00003FEC //A17 = 2.8143303971756051015245433043e-06
|
|
data8 0x8683C4687FA22E68, 0x00003FEE //A16 = 8.0177018464360416764308252462e-06
|
|
data8 0xFDA09E5D33E32968, 0x00003FEE //A15 = 1.5117372062443781157389064848e-05
|
|
data8 0xFFB00D0CFF4089B4, 0x00003FEF //A14 = 3.0480348961227424242198174995e-05
|
|
data8 0xFEF6C39566785085, 0x00003FF0 //A13 = 6.0788135974125244644334004947e-05
|
|
//A2 = 4.1184033042643969357854416558e-01 + 1.2103396182129232634761000000e-18
|
|
data8 0x3FDA5B978B96BEBF, 0x3C3653AAD0A139E4
|
|
//A1 = -4.2278433509846713445057275749e-01+ -4.9429151528135657430413000000e-18
|
|
data8 0xBFDB0EE6072093CE, 0xBC56CB907027554F
|
|
//A0 = 1.0000000000000000000000000000e+00 + 1.0969171200000000000000000000e-31
|
|
data8 0x3FF0000000000000, 0x3981CC6A5B20B4D5
|
|
data8 0xFF2B7BA9A8D68C37, 0x00003FF1 //A12 = 1.2167446884801403650547161615e-04
|
|
data8 0xFCA53468E3692EF1, 0x00003FF2 //A11 = 2.4094136329542400976250900707e-04
|
|
data8 0x808D698A9C993615, 0x00003FF4 //A10 = 4.9038845704938303659791698883e-04
|
|
data8 0xF10F8E3FB8BB4AFB, 0x00003FF4 //A9 = 9.1957383840999861214472423976e-04
|
|
data8 0x89E224E42F93F005, 0x00003FF6 //A8 = 2.1039333407187324139473634747e-03
|
|
data8 0xBAF374824937A323, 0x00003FF6 //A7 = 2.8526458211545152218493600470e-03
|
|
data8 0xB6BF7564F52140C6, 0x00003FF8 //A6 = 1.1154045718131014476684982178e-02
|
|
//
|
|
// Polynomial coefficients for the tgammal(x), 0.0 <= |x| < 0.125
|
|
//A5 = -9.8199506890314514073736518185e-01+ -5.9363811993837985890950900000e-17
|
|
data8 0xBFEF6C80EC38B67A, 0xBC911C46B447C81F
|
|
//A4 = 9.8172808683440015986576554496e-01 + 2.7457414262802803699834200000e-17
|
|
data8 0x3FEF6A51055096B5, 0x3C7FA7FF90ACAD1F
|
|
//A3 = -9.0747907608088618225394839101e-01 + -1.0676255850934306734701780000e-16
|
|
data8 0xBFED0A118F324B62, 0xBC9EC5AFB633438D
|
|
data8 0x9217E83FA207CB80, 0x00003FFD //A20 = 2.8533864762086088781083621561e-01
|
|
data8 0xA8DABFA52FDF03EC, 0x0000BFFE //A19= -6.5958783896337186303285832783e-01
|
|
data8 0xE331ED293AF39F9B, 0x00003FFE //A18 = 8.8748056656454687449654731184e-01
|
|
data8 0xF9163C5DDB52419D, 0x0000BFFE //A17= -9.7299554149078295602977718525e-01
|
|
data8 0xFEC0A1C672CB9265, 0x00003FFE //A16 = 9.9512683005268190987854104489e-01
|
|
data8 0xFFD2D65B8EA7B5F4, 0x0000BFFE //A15= -9.9931087241443958201592847861e-01
|
|
data8 0xFFF93AA39EE53445, 0x00003FFE //A14 = 9.9989668364186884793382816496e-01
|
|
data8 0xFFFB99A9A3F5F480, 0x0000BFFE //A13= -9.9993286506283835663204999212e-01
|
|
//A2 = 9.8905599532797250361682017683e-01 + 5.1778575360788420716540100000e-17
|
|
data8 0x3FEFA658C23B1578, 0x3C8DD92B45408D07
|
|
//A1 = -5.7721566490153275452712478000e-01+ -1.0607938730998824663273110000e-16
|
|
data8 0xBFE2788CFC6FB618, 0xBC9E9346F8FDE55B
|
|
//A0 = 9.9999999999999988897769753748e-01 + 1.1102230246251564036631420000e-16
|
|
data8 0x3FEFFFFFFFFFFFFF, 0x3C9FFFFFFFFFFFFF
|
|
data8 0xFFF7FEBB545812C1, 0x00003FFE //A12 = 9.9987785409425126648628395084e-01
|
|
data8 0xFFF00C02E943A3F2, 0x0000BFFE //A11= -9.9975657530855116454438747397e-01
|
|
data8 0xFFE0420AADC53820, 0x00003FFE //A10 = 9.9951565514290485919027183699e-01
|
|
data8 0xFFC01EB42EF27EEB, 0x0000BFFE //A9 = -9.9902526759155739377365522320e-01
|
|
data8 0xFF83DAD0BF23FF12, 0x00003FFE //A8 = 9.9810569378236378800364235948e-01
|
|
data8 0xFEF9F8ABDBCDB2F3, 0x0000BFFE //A7 = -9.9600176044241699109053158187e-01
|
|
data8 0xFE3F05375988491D, 0x00003FFE //A6 = 9.9314911462127599008937257662e-01
|
|
LOCAL_OBJECT_END(Constants_Tgammal_poly_splitted)
|
|
|
|
.align 64
|
|
LOCAL_OBJECT_START(Constants_Tgammal_common)
|
|
// Positive overflow value
|
|
data8 0x3FE0000000000000 // 0.5
|
|
data8 0x3FF8000000000000 // 1.5
|
|
data8 0x3FD0000000000000 // 0.25
|
|
data8 0x0000000000000000 // 0
|
|
data8 0xDB718C066B352E21, 0x00004009 // Positive overflow value
|
|
LOCAL_OBJECT_END(Constants_Tgammal_common)
|
|
|
|
|
|
|
|
//=======================================================
|
|
// Lgamma registers
|
|
|
|
// General Purpose Registers
|
|
GR_l_Log_Table = r33
|
|
GR_l_Log_Table1 = r34
|
|
GR_l_BIAS = r34
|
|
GR_l_Index1 = r35
|
|
GR_l_Index2 = r36
|
|
GR_l_signif_Z = r37
|
|
GR_l_X_0 = r38
|
|
GR_l_X_1 = r39
|
|
GR_l_X_2 = r40
|
|
GR_l_Z_1 = r41
|
|
GR_l_Z_2 = r42
|
|
GR_l_N = r43
|
|
GR_l_Index3 = r44
|
|
GR_l_Stirling_Table = r45
|
|
GR_l_N_Unbiased = r46
|
|
|
|
// Floating Point Registers
|
|
FR_l_logl_X = f8
|
|
|
|
FR_l_h_3 = f10
|
|
FR_l_poly_hi = f10
|
|
FR_l_W = f11
|
|
FR_l_S = f12
|
|
FR_l_GS_hi = f13
|
|
FR_l_Y_lo = f13
|
|
FR_l_r_cor = f14
|
|
FR_l_G_1 = f15
|
|
FR_l_G = f15
|
|
FR_l_H_1 = f32
|
|
FR_l_H = f32
|
|
FR_l_h = f33
|
|
FR_l_h_1 = f33
|
|
FR_l_N = f33
|
|
FR_l_G_2 = f34
|
|
FR_l_H_2 = f35
|
|
FR_l_h_2 = f36
|
|
FR_l_G_3 = f37
|
|
FR_l_log2_hi = f38
|
|
FR_l_GS_lo = f39
|
|
FR_l_H_3 = f40
|
|
FR_l_float_N = f41
|
|
FR_l_Q_4 = f42
|
|
FR_l_Q_3 = f43
|
|
FR_l_Q_2 = f44
|
|
FR_l_Q_1 = f45
|
|
FR_l_Q_5 = f46
|
|
FR_l_Q_6 = f47
|
|
FR_l_log2_lo = f48
|
|
FR_l_r = f49
|
|
FR_l_poly_lo = f50
|
|
FR_l_poly = f51
|
|
FR_l_rsq = f52
|
|
FR_l_Y_lo_res = f53
|
|
|
|
FR_l_Y0 = f55
|
|
FR_l_Q0 = f56
|
|
FR_l_E0 = f57
|
|
FR_l_E2 = f58
|
|
FR_l_E1 = f59
|
|
FR_l_Y1 = f60
|
|
FR_l_E3 = f61
|
|
FR_l_Y2 = f62
|
|
|
|
FR_l_Z = f63
|
|
FR_l_Z2 = f64
|
|
FR_l_Z4 = f65
|
|
FR_l_Z8 = f66
|
|
|
|
FR_l_CH = f67
|
|
FR_l_CL = f68
|
|
|
|
FR_l_B2 = f69
|
|
FR_l_B4 = f70
|
|
FR_l_B6 = f71
|
|
FR_l_B8 = f72
|
|
FR_l_B10 = f73
|
|
FR_l_B12 = f74
|
|
FR_l_B14 = f75
|
|
FR_l_B16 = f76
|
|
FR_l_B18 = f77
|
|
FR_l_Half = f78
|
|
FR_l_SS = f79
|
|
FR_l_AbsX_m_Half = f80
|
|
FR_l_CXH = f81
|
|
FR_l_CXL = f82
|
|
FR_l_SSCXH = f83
|
|
FR_l_SSCXL = f84
|
|
FR_l_XYH = f85
|
|
FR_l_XYL = f86
|
|
FR_l_Temp = f87
|
|
|
|
FR_l_logl_YHi = f88
|
|
FR_l_logl_YLo = f89
|
|
|
|
FR_l_SignedXYH = f123
|
|
|
|
FR_l_AbsX = f127
|
|
|
|
|
|
|
|
//=======================================================
|
|
// Negative part registers
|
|
|
|
// General Purpose Registers
|
|
GR_n_sin_Table = r47
|
|
GR_n_XN = r48
|
|
|
|
// Float point registers
|
|
FR_n_IXNS = f125
|
|
FR_n_IXN = f126
|
|
|
|
FR_n_XNS = f90
|
|
FR_n_XS = f91
|
|
FR_n_XS2 = f92
|
|
FR_n_XS2L = f93
|
|
FR_n_XS4 = f94
|
|
FR_n_XS7 = f95
|
|
FR_n_XS8 = f96
|
|
FR_n_TT = f97
|
|
FR_n_TH = f98
|
|
FR_n_TL = f99
|
|
|
|
FR_n_A2H = f100
|
|
FR_n_A2L = f101
|
|
FR_n_A1H = f102
|
|
FR_n_A1L = f103
|
|
FR_n_A9 = f104
|
|
FR_n_A8 = f105
|
|
FR_n_A7 = f106
|
|
FR_n_A6 = f107
|
|
FR_n_A5 = f108
|
|
FR_n_A4 = f109
|
|
FR_n_A3 = f110
|
|
|
|
FR_n_PolyH = f111
|
|
FR_n_PolyL = f112
|
|
|
|
FR_n_Poly1H = f113
|
|
FR_n_SinxH = f113 // the same as FR_n_Poly1H
|
|
FR_n_Poly1L = f114
|
|
FR_n_SinxL = f114 // the same as FR_n_Poly1L
|
|
|
|
FR_n_Tail = f115
|
|
FR_n_NegOne = f116
|
|
|
|
FR_n_Y0 = f117
|
|
|
|
FR_n_Q0 = f118
|
|
FR_n_E0 = f119
|
|
|
|
FR_n_E2 = f120
|
|
FR_n_E1 = f121
|
|
|
|
FR_n_Y1 = f55
|
|
FR_n_E3 = f56
|
|
|
|
FR_n_Y2 = f57
|
|
FR_n_R0 = f58
|
|
|
|
FR_n_E4 = f59
|
|
FR_n_RcpResH = f60
|
|
|
|
FR_n_Y3 = f61
|
|
FR_n_R1 = f62
|
|
FR_n_Temp = f63
|
|
|
|
FR_n_RcpResL = f64
|
|
|
|
FR_n_ResH = f65
|
|
FR_n_ResL = f66
|
|
|
|
|
|
|
|
|
|
//=======================================================
|
|
// Exp registers
|
|
|
|
// General Purpose Registers
|
|
GR_e_ad_Arg = r33
|
|
GR_e_ad_A = r34
|
|
GR_e_signexp_x = r35
|
|
GR_e_exp_x = r35
|
|
GR_e_exp_mask = r36
|
|
GR_e_ad_W1 = r37
|
|
GR_e_ad_W2 = r38
|
|
GR_e_M2 = r39
|
|
GR_e_M1 = r40
|
|
GR_e_K = r41
|
|
GR_e_exp_2_mk = r42
|
|
GR_e_exp_2_k = r43
|
|
GR_e_ad_T1 = r44
|
|
GR_e_ad_T2 = r45
|
|
GR_e_N_fix = r46
|
|
GR_e_one = r47
|
|
GR_e_exp_bias = r48
|
|
GR_e_sig_inv_ln2 = r49
|
|
GR_e_rshf_2to51 = r50
|
|
GR_e_exp_2tom51 = r51
|
|
GR_e_rshf = r52
|
|
|
|
// Floating Point Registers
|
|
FR_e_RSHF_2TO51 = f10
|
|
FR_e_INV_LN2_2TO63 = f11
|
|
FR_e_W_2TO51_RSH = f12
|
|
FR_e_2TOM51 = f13
|
|
FR_e_RSHF = f14
|
|
FR_e_Y_hi = f15
|
|
FR_e_Y_lo = f32
|
|
FR_e_scale = f33
|
|
FR_e_float_N = f34
|
|
FR_e_N_signif = f35
|
|
FR_e_L_hi = f36
|
|
FR_e_L_lo = f37
|
|
FR_e_r = f38
|
|
FR_e_W1 = f39
|
|
FR_e_T1 = f40
|
|
FR_e_W2 = f41
|
|
FR_e_T2 = f42
|
|
FR_e_W1_p1 = f43
|
|
FR_e_rsq = f44
|
|
FR_e_A2 = f45
|
|
FR_e_r4 = f46
|
|
FR_e_A3 = f47
|
|
FR_e_poly = f48
|
|
FR_e_T = f49
|
|
FR_e_W = f50
|
|
FR_e_Wp1 = f51
|
|
FR_e_r6 = f52
|
|
FR_e_2_mk = f53
|
|
FR_e_A1 = f54
|
|
FR_e_T_scale = f55
|
|
FR_e_result_lo = f56
|
|
FR_e_W_T_scale = f57
|
|
FR_e_Wp1_T_scale = f58
|
|
|
|
FR_e_expl_Input_X = f123
|
|
FR_e_expl_Input_Y = f124
|
|
FR_e_expl_Output_X = f123
|
|
FR_e_expl_Output_Y = f124
|
|
|
|
|
|
FR_e_expl_Input_AbsX = f122
|
|
|
|
|
|
|
|
//=======================================================
|
|
// Common registers
|
|
|
|
// General Purpose Registers
|
|
GR_c_Table = r53
|
|
GR_c_NegUnderflow = r54
|
|
GR_c_NegSingularity = r55
|
|
GR_c_X = r56
|
|
GR_c_SignBit = r57
|
|
GR_c_13 = r58
|
|
|
|
|
|
// Floating Point Registers
|
|
FR_c_PosOverflow = f123
|
|
FR_c_XN = f124
|
|
|
|
|
|
//=======================================================
|
|
// Polynomial part registers
|
|
|
|
// General Purpose Registers
|
|
GR_p_Table = r59
|
|
GR_p_XN = r33
|
|
GR_p_Table2 = r34
|
|
GR_p_Int = r35
|
|
GR_p_Offset = r36
|
|
GR_p_Offset2 = r38
|
|
GR_p_X_Sgnd = GR_l_signif_Z // = r37
|
|
GR_p_Exp = r61
|
|
GR_p_Bias = r62
|
|
GR_p_0p75 = r63
|
|
|
|
// Floating Point Registers
|
|
FR_p_AbsX = FR_l_AbsX // = f127
|
|
FR_p_IXN = FR_n_IXN // = f126
|
|
FR_p_XN = f32
|
|
FR_p_0p5 = f33
|
|
FR_p_1p5 = f34
|
|
FR_p_AbsXM1 = f35
|
|
FR_p_2 = f36
|
|
|
|
FR_p_A20 = f37
|
|
FR_p_A19 = f38
|
|
FR_p_A18 = f39
|
|
FR_p_A17 = f40
|
|
FR_p_A16 = f41
|
|
FR_p_A15 = f42
|
|
FR_p_A14 = f43
|
|
FR_p_A13 = f44
|
|
FR_p_A12 = f45
|
|
FR_p_A11 = f46
|
|
FR_p_A10 = f47
|
|
FR_p_A9 = f48
|
|
FR_p_A8 = f49
|
|
FR_p_A7 = f50
|
|
FR_p_A6 = f51
|
|
FR_p_A5H = f52
|
|
FR_p_A5L = f53
|
|
FR_p_A4H = f54
|
|
FR_p_A4L = f55
|
|
FR_p_A3H = f56
|
|
FR_p_A3L = f57
|
|
FR_p_A2H = f58
|
|
FR_p_A2L = f59
|
|
FR_p_A1H = f60
|
|
FR_p_A1L = f61
|
|
FR_p_A0H = f62
|
|
FR_p_A0L = f63
|
|
|
|
FR_p_XR = f64
|
|
FR_p_XR2 = f65
|
|
FR_p_XR2L = f52
|
|
|
|
FR_p_XR3 = f58
|
|
FR_p_XR3L = f38
|
|
|
|
FR_p_XR4 = f42
|
|
FR_p_XR6 = f40
|
|
FR_p_XR8 = f37
|
|
|
|
FR_p_Poly5H = f66
|
|
FR_p_Poly5L = f67
|
|
FR_p_Poly4H = f53
|
|
FR_p_Poly4L = f44
|
|
FR_p_Poly3H = f41
|
|
FR_p_Poly3L = f47
|
|
FR_p_Poly2H = f68
|
|
FR_p_Poly2L = f54
|
|
FR_p_Poly1H = f55
|
|
FR_p_Poly1L = f46
|
|
FR_p_Poly0H = f39
|
|
FR_p_Poly0L = f43
|
|
|
|
FR_p_Temp5H = f69
|
|
FR_p_Temp5L = f70
|
|
FR_p_Temp4H = f71
|
|
FR_p_Temp4L = f60
|
|
FR_p_Temp2H = f72
|
|
FR_p_Temp2L = f73
|
|
FR_p_Temp1H = f59
|
|
FR_p_Temp1L = f61
|
|
FR_p_Temp0H = f49
|
|
FR_p_Temp0L = f48
|
|
FR_p_PolyTail = f45
|
|
FR_p_OddPoly0H = f56
|
|
FR_p_OddPoly0L = f51
|
|
|
|
FR_p_0p25 = f73
|
|
|
|
|
|
//=======================================================
|
|
// Negative polynomial part registers
|
|
// General Purpose Registers
|
|
GR_r_sin_Table = r47
|
|
GR_r_sin_Table2 = r60
|
|
|
|
// Floating Point Registers
|
|
FR_r_IXNS = FR_n_IXNS
|
|
FR_r_IXN = FR_n_IXN
|
|
|
|
FR_r_AbsX = FR_l_AbsX
|
|
|
|
FR_r_A9 = f74
|
|
FR_r_A8 = f75
|
|
FR_r_A7 = f76
|
|
FR_r_A6 = f77
|
|
FR_r_A5 = f78
|
|
FR_r_A4 = f79
|
|
FR_r_A3 = f80
|
|
FR_r_A2H = f81
|
|
FR_r_A2L = f82
|
|
FR_r_A1H = f83
|
|
FR_r_A1L = f84
|
|
|
|
FR_r_XNS = f85
|
|
FR_r_XS = f86
|
|
FR_r_XS2 = f87
|
|
FR_r_XS2L = f88
|
|
FR_r_XS4 = f89
|
|
FR_r_XS7 = f90
|
|
FR_r_XS8 = f91
|
|
|
|
FR_r_Tail = f92
|
|
|
|
FR_r_TT = f93
|
|
FR_r_TH = f94
|
|
FR_r_TL = f95
|
|
|
|
FR_r_ResH = f96
|
|
FR_r_ResL = f97
|
|
|
|
FR_r_Res3H = f98
|
|
FR_r_Res3L = f99
|
|
|
|
FR_r_Res1H = f100
|
|
FR_r_Res1L = f101
|
|
|
|
|
|
|
|
FR_r_Y0 = f102
|
|
FR_r_Q0 = f103
|
|
FR_r_E0 = f104
|
|
FR_r_E2 = f105
|
|
FR_r_E1 = f106
|
|
FR_r_Y1 = f107
|
|
FR_r_E3 = f108
|
|
FR_r_Y2 = f109
|
|
FR_r_R0 = f110
|
|
FR_r_E4 = f111
|
|
FR_r_ZH = f112
|
|
FR_r_Y3 = f113
|
|
FR_r_R1 = f114
|
|
FR_r_ZHN = f115
|
|
FR_r_ZL = f115
|
|
FR_r_NegOne = f116
|
|
|
|
FR_z_Y0 = f102
|
|
FR_z_Q0 = f103
|
|
FR_z_E0 = f104
|
|
FR_z_E2 = f105
|
|
FR_z_E1 = f106
|
|
FR_z_Y1 = f107
|
|
FR_z_E3 = f108
|
|
FR_z_Y2 = f109
|
|
FR_z_R0 = f110
|
|
FR_z_E4 = f111
|
|
FR_z_ZH = f112
|
|
FR_z_Y3 = f113
|
|
FR_z_R1 = f114
|
|
FR_z_ZL = f115
|
|
|
|
|
|
// General Purpose Registers
|
|
GR_SAVE_PFS = r32
|
|
GR_DenOverflow = r33
|
|
GR_u_XN = r34
|
|
|
|
GR_SAVE_B0 = r35
|
|
GR_SAVE_GP = r36
|
|
GR_SAVE_SP = r37
|
|
|
|
// Floating Point Registers
|
|
FR_u_IXN = f34
|
|
|
|
|
|
// ERROR HANDLER REGISTERS
|
|
GR_Parameter_X = r64
|
|
GR_Parameter_Y = r65
|
|
GR_Parameter_RESULT = r66
|
|
GR_Parameter_TAG = r67
|
|
|
|
FR_RESULT = f8
|
|
FR_X = f32
|
|
FR_Y = f1
|
|
|
|
|
|
.section .text
|
|
GLOBAL_LIBM_ENTRY(tgammal)
|
|
{ .mfi
|
|
alloc r32 = ar.pfs,0,32,4,0
|
|
fabs FR_l_AbsX = f8 // Get absolute value of X
|
|
addl GR_n_sin_Table = @ltoff(Constants_Tgammal_sin), gp
|
|
}
|
|
{ .mfi
|
|
addl GR_l_Log_Table=@ltoff(Constants_Tgammal_log_80_Z_G_H_h1#),gp
|
|
nop.f 0
|
|
addl GR_l_Stirling_Table = @ltoff(Constants_Tgammal_stirling), gp
|
|
};;
|
|
|
|
{ .mfi
|
|
getf.sig GR_l_signif_Z = f8 // Significand of X
|
|
fcvt.fx.s1 FR_n_IXNS = f8 // Convert to fixed point
|
|
addl GR_c_Table = @ltoff(Constants_Tgammal_common), gp
|
|
}
|
|
{ .mfi
|
|
ld8 GR_l_Log_Table = [GR_l_Log_Table]
|
|
nop.f 0
|
|
addl GR_p_Table = @ltoff(Constants_Tgammal_poly), gp
|
|
};;
|
|
|
|
{ .mfi
|
|
ld8 GR_n_sin_Table = [GR_n_sin_Table]
|
|
fclass.m p6,p0 = f8,0x1EF // Check x for NaN, 0, INF, denorm
|
|
// NatVal.
|
|
addl GR_c_NegSingularity = 0x1003E, r0
|
|
}
|
|
{ .mlx
|
|
ld8 GR_l_Stirling_Table = [GR_l_Stirling_Table]
|
|
movl GR_c_13 = 0x402A000000000000 // 13.0
|
|
};;
|
|
|
|
{ .mfi
|
|
getf.d GR_c_X = f8 // Double prec. X to general register
|
|
frcpa.s1 FR_z_Y0,p0 = f1,f8 // y = frcpa(x) (for negatives)
|
|
extr.u GR_l_Index1 = GR_l_signif_Z, 59, 4 // = High 4 bits of Z
|
|
}
|
|
{ .mlx
|
|
ld8 GR_c_Table = [GR_c_Table]
|
|
movl GR_c_SignBit = 0x8000000000000000 // High bit (sign)
|
|
};;
|
|
|
|
{ .mfi
|
|
ld8 GR_p_Table = [GR_p_Table]
|
|
fcmp.lt.s1 p15, p14 = f8,f0 // p14 - positive arg, p15 - negative
|
|
shl GR_l_Index1 = GR_l_Index1,5 // Adjust Index1 ptr (x32)
|
|
}
|
|
{ .mfb
|
|
adds GR_c_NegUnderflow = 1765, r0
|
|
nop.f 0
|
|
(p6) br.cond.spnt tgammal_spec // Spec. values processing branch ////////////
|
|
// (0s, INFs, NANs, NatVals, denormals) //////
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfpd FR_l_CH,FR_l_CL= [GR_l_Stirling_Table], 16 // Load CH, CL
|
|
fcvt.fx.trunc.s1 FR_n_IXN = FR_l_AbsX // Abs arg to int by trunc
|
|
extr.u GR_l_X_0 = GR_l_signif_Z, 49, 15 // High 15 bit of Z
|
|
}
|
|
{ .mfi
|
|
add GR_l_Index1 = GR_l_Index1,GR_l_Log_Table // Add offset
|
|
fma.s1 FR_p_2 = f1, f1, f1 // 2.0
|
|
andcm GR_c_X = GR_c_X, GR_c_SignBit // Remove sign
|
|
};;
|
|
|
|
{ .mfi
|
|
addl GR_l_Log_Table = @ltoff(Constants_Tgammal_log_80_Z_G_H_h2#), gp
|
|
fcmp.lt.s1 p10, p0 = FR_l_AbsX, f1 // If |X|<1 then p10 = 1
|
|
nop.i 0
|
|
}
|
|
{ .mlx
|
|
ld2 GR_l_Z_1 = [GR_l_Index1],4 // load Z_1 from Index1
|
|
movl GR_l_BIAS = 0x000000000000FFFF // Bias for exponent
|
|
};;
|
|
|
|
{ .mfi
|
|
ld8 GR_l_Log_Table = [GR_l_Log_Table]
|
|
frcpa.s1 FR_l_Y0, p0 = f1, FR_l_AbsX // y = frcpa(x)
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfs FR_l_G_1 = [GR_l_Index1],4 // Load G_1
|
|
fsub.s1 FR_l_W = FR_l_AbsX, f1 // W = |X|-1
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
getf.exp GR_l_N_Unbiased= FR_l_AbsX // exponent of |X|
|
|
fmerge.se FR_l_S = f1, FR_l_AbsX // S = merging of X and 1.0
|
|
cmp.gtu p11, p0 = GR_c_13, GR_c_X // If 1 <= |X| < 13
|
|
// then p11 = 1
|
|
}
|
|
{ .mfb
|
|
ldfs FR_l_H_1 = [GR_l_Index1],8 // Load H_1
|
|
fcvt.xf FR_n_XNS = FR_n_IXNS // Convert to FP repr. of int X
|
|
(p10) br.cond.spnt tgamma_lt_1 // Branch to |X| < 1 path ///////////////////
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfpd FR_n_A2H, FR_n_A2L = [GR_n_sin_Table], 16
|
|
nop.f 0
|
|
pmpyshr2.u GR_l_X_1 = GR_l_X_0,GR_l_Z_1,15 // Adjust Index2 (x32)
|
|
}
|
|
{ .mfb
|
|
ldfe FR_l_B2 = [GR_l_Stirling_Table], 16
|
|
nop.f 0
|
|
(p11) br.cond.spnt tgamma_lt_13 // Branch to 1 <= |X| < 13 path ///////////////
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_l_h_1 = [GR_l_Index1],0
|
|
nop.f 0
|
|
sub GR_l_N = GR_l_N_Unbiased, GR_l_BIAS // N - BIAS
|
|
}
|
|
{ .mib
|
|
ldfpd FR_l_B4,FR_l_B6= [GR_l_Stirling_Table], 16 // Load C
|
|
(p15) cmp.geu.unc p8,p0 = GR_l_N_Unbiased, GR_c_NegSingularity
|
|
(p8) br.cond.spnt tgammal_singularity // Singularity for arg < to -2^63 //////
|
|
};;
|
|
|
|
{ .mmi
|
|
(p15) ldfpd FR_n_A1H, FR_n_A1L = [GR_n_sin_Table], 16
|
|
ldfpd FR_l_B8, FR_l_B10 = [GR_l_Stirling_Table], 16
|
|
add GR_c_Table = 0x20, GR_c_Table
|
|
};;
|
|
|
|
{ .mfi
|
|
(p15) ldfe FR_n_A9 = [GR_n_sin_Table], 16
|
|
fma.s1 FR_l_Q0 = f1,FR_l_Y0,f0 // Q0 = Y0
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfpd FR_l_B12, FR_l_B14 = [GR_l_Stirling_Table], 16
|
|
fnma.s1 FR_l_E0 = FR_l_Y0,FR_l_AbsX,f1 // e = 1-b*y
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
(p15) ldfe FR_n_A8 = [GR_n_sin_Table], 16
|
|
fcvt.xf FR_c_XN = FR_n_IXN // Convert to FP repr. of int X
|
|
extr.u GR_l_Index2 = GR_l_X_1, 6, 4 // Extract Index2
|
|
}
|
|
{ .mfi
|
|
ldfpd FR_l_B16, FR_l_B18 = [GR_l_Stirling_Table], 16
|
|
nop.f 0
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
(p15) ldfe FR_n_A7 = [GR_n_sin_Table], 16
|
|
fms.s1 FR_l_CXH = FR_l_CH, f1, FR_l_AbsX // CXH = CH+|X|
|
|
shl GR_l_Index2 = GR_l_Index2,5
|
|
}
|
|
{ .mfi
|
|
ldfd FR_l_Half = [GR_l_Stirling_Table] // Load 0.5
|
|
nop.f 0
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
add GR_l_Index2 = GR_l_Index2, GR_l_Log_Table // Add offset
|
|
nop.f 0
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
(p15) ldfe FR_n_A6 = [GR_n_sin_Table], 16
|
|
(p15) fma.s1 FR_n_XS = FR_l_AbsX , f1, FR_n_XNS // xs = x - int(x)
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mmi
|
|
ld2 GR_l_Z_2 = [GR_l_Index2],4
|
|
addl GR_l_Log_Table = @ltoff(Constants_Tgammal_log_80_h3_G_H#),gp
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ld8 GR_l_Log_Table = [GR_l_Log_Table]
|
|
fma.s1 FR_l_E2 = FR_l_E0,FR_l_E0,FR_l_E0 // e2 = e+e^2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfs FR_l_G_2 = [GR_l_Index2],4
|
|
fma.s1 FR_l_E1 = FR_l_E0,FR_l_E0,f0 // e1 = e^2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mmi
|
|
ldfs FR_l_H_2 = [GR_l_Index2],8
|
|
(p15) ldfe FR_n_A5 = [GR_n_sin_Table], 16
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
setf.sig FR_l_float_N = GR_l_N // float_N = Make N a fp number
|
|
nop.f 0
|
|
pmpyshr2.u GR_l_X_2 = GR_l_X_1,GR_l_Z_2,15 // X_2 = X_1 * Z_2
|
|
}
|
|
{ .mfi
|
|
ldfe FR_l_h_2 = [GR_l_Index2],0
|
|
fma.s1 FR_l_CXL = FR_l_AbsX, f1, FR_l_CXH // CXL = |X|+CXH
|
|
add GR_l_Log_Table1= 0x200, GR_l_Log_Table
|
|
};;
|
|
|
|
{ .mfi
|
|
(p15) ldfe FR_n_A4 = [GR_n_sin_Table], 16
|
|
(p15) fcmp.eq.unc.s1 p9,p0 = FR_l_AbsX, FR_c_XN //if argument is integer
|
|
// and negative
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfe FR_c_PosOverflow = [GR_c_Table],16 //Load pos overflow value
|
|
(p15) fma.s1 FR_n_XS2 = FR_n_XS, FR_n_XS, f0 // xs^2 = xs*xs
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
(p15) ldfe FR_n_A3 = [GR_n_sin_Table], 16
|
|
nop.f 0
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
(p15) getf.sig GR_n_XN = FR_n_IXN // int(x) to general reg
|
|
fma.s1 FR_l_Y1 = FR_l_Y0,FR_l_E2,FR_l_Y0 // y1 = y+y*e2
|
|
nop.i 0
|
|
}
|
|
{ .mfb
|
|
nop.m 0
|
|
fma.s1 FR_l_E3 = FR_l_E1,FR_l_E1,FR_l_E0 // e3 = e+e1^2
|
|
(p9) br.cond.spnt tgammal_singularity // Singularity for integer /////////////
|
|
// and negative arguments //////////////
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_l_AbsX_m_Half = FR_l_AbsX, f1, FR_l_Half // |x|-0.5
|
|
extr.u GR_l_Index2 = GR_l_X_2, 1, 5 // Get Index3
|
|
};;
|
|
|
|
{ .mfi
|
|
shladd GR_l_Log_Table1= GR_l_Index2, 2, GR_l_Log_Table1
|
|
nop.f 0
|
|
shladd GR_l_Index3 = GR_l_Index2,4, GR_l_Log_Table // Index3
|
|
}
|
|
{ .mfb
|
|
(p15) cmp.gtu.unc p11, p0 = GR_n_XN, GR_c_NegUnderflow // X < -1765
|
|
fms.s1 FR_l_CXL = FR_l_CH, f1, FR_l_CXL // CXL = CH - CXL
|
|
(p11) br.cond.spnt tgammal_underflow // Singularity for negative argument //////
|
|
// at underflow domain (X < -1765) //////
|
|
};;
|
|
|
|
{ .mfi
|
|
addl GR_l_Log_Table = @ltoff(Constants_Tgammal_log_80_Q#), gp
|
|
(p15) fma.s1 FR_n_TT = FR_n_A2L, FR_n_XS2, f0 // T=A2L*x^2
|
|
tbit.nz.unc p13, p12 = GR_n_XN, 0x0 // whether [X] odd or even
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fms.s1 FR_n_XS2L = FR_n_XS, FR_n_XS, FR_n_XS2 // xs^2 Low part
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ld8 GR_l_Log_Table = [GR_l_Log_Table]
|
|
(p15) fma.s1 FR_n_A7 = FR_n_A8, FR_n_XS2, FR_n_A7 // poly tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfe FR_l_h_3 = [GR_l_Index3],12
|
|
(p15) fma.s1 FR_n_XS4 = FR_n_XS2, FR_n_XS2, f0 // xs^4 = xs^2*xs^2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfs FR_l_H_3 = [GR_l_Log_Table1], 0
|
|
fma.s1 FR_l_Y2 = FR_l_Y1, FR_l_E3, FR_l_Y0 // y2 = y+y1*e3
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfs FR_l_G_3 = [GR_l_Index3], 0
|
|
fnma.s1 FR_l_Z = FR_l_AbsX,FR_l_Q0,f1 // r = a-b*q
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fmpy.s1 FR_l_G = FR_l_G_1, FR_l_G_2 // G = G1 * G_2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fadd.s1 FR_l_H = FR_l_H_1, FR_l_H_2 // H = H_1 + H_2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_l_log2_hi = [GR_l_Log_Table],16 // load log2_hi part
|
|
fadd.s1 FR_l_h = FR_l_h_1, FR_l_h_2 // h = h_1 + h_2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fcvt.xf FR_l_float_N = FR_l_float_N // int(N)
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_l_log2_lo = [GR_l_Log_Table],16 // Load log2_lo part
|
|
fma.s1 FR_l_CXL = FR_l_CXL, f1, FR_l_CL
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_TT = FR_n_A2H, FR_n_XS2L, FR_n_TT // T=A2H*x2L+T
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_l_Q_6 = [GR_l_Log_Table],16
|
|
(p15) fma.s1 FR_n_A3 = FR_n_A4, FR_n_XS2, FR_n_A3 // poly tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_A5 = FR_n_A6, FR_n_XS2, FR_n_A5 // poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_l_Q_5 = [GR_l_Log_Table],16
|
|
(p15) fabs FR_n_XS = FR_n_XS // abs(xs)
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_Z = FR_l_Z,FR_l_Y2,FR_l_Q0 // x_hi = q+r*y2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_l_Q_4 = [GR_l_Log_Table],16
|
|
(p15) fma.s1 FR_n_A7 = FR_n_A9, FR_n_XS4, FR_n_A7 // poly tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_XS7 = FR_n_XS4, FR_n_XS2, f0 // = x^4*x^2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_l_Q_3 = [GR_l_Log_Table],16
|
|
fneg FR_n_NegOne = f1 // -1.0
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_XS8 = FR_n_XS4, FR_n_XS4, f0 // xs^8 = xs^4*xs^4
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_l_Q_2 = [GR_l_Log_Table],16
|
|
fadd.s1 FR_l_h = FR_l_h, FR_l_h_3 // h = h_1 + h_2 + h_3
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_TH = FR_n_A2H, FR_n_XS2, FR_n_TT // A2H*xs2+T
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_l_Q_1 = [GR_l_Log_Table],16
|
|
fmpy.s1 FR_l_G = FR_l_G, FR_l_G_3 // G = G_1 * G_2 * G_3
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fadd.s1 FR_l_H = FR_l_H, FR_l_H_3 // H = H_1 + H_2 + H_3
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_Z2 = FR_l_Z, FR_l_Z, f0 // Z^2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_A3 = FR_n_A5, FR_n_XS4, FR_n_A3 // poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p14) fcmp.gt.unc.s1 p7,p0 = FR_l_AbsX, FR_c_PosOverflow //X > 1755.5483
|
|
// (overflow domain, result cannot be represented by normal value)
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_XS7 = FR_n_XS7, FR_n_XS, f0 // x^7 construction
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fms.s1 FR_n_TL = FR_n_A2H, FR_n_XS2, FR_n_TH // A2H*xs2+TH
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_PolyH = FR_n_TH, f1, FR_n_A1H // PolyH=TH+A1H
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fmpy.s1 FR_l_GS_hi = FR_l_G, FR_l_S // GS_hi = G*S
|
|
nop.i 0
|
|
}
|
|
{ .mfb
|
|
nop.m 0
|
|
fms.s1 FR_l_r = FR_l_G, FR_l_S, f1 // r = G*S -1
|
|
(p7) br.cond.spnt tgammal_overflow // Overflow path for arg > 1755.5483 //////
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_B14 = FR_l_B16, FR_l_Z2, FR_l_B14// Bernoulli tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_Z4 = FR_l_Z2, FR_l_Z2, f0 // Z^4 = Z^2*Z^2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_B2 = FR_l_B4, FR_l_Z2, FR_l_B2 // Bernoulli tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_B6 = FR_l_B8, FR_l_Z2, FR_l_B6 // Bernoulli tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_B10 = FR_l_B12, FR_l_Z2, FR_l_B10// Bernoulli tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_Tail = FR_n_A7, FR_n_XS8, FR_n_A3 // poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_TL = FR_n_TL, f1, FR_n_TT // TL = TL+T
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fms.s1 FR_n_PolyL = FR_n_A1H, f1, FR_n_PolyH // polyH+A1H
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_poly_lo = FR_l_r, FR_l_Q_6, FR_l_Q_5 // Q_5+r*Q_6
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fsub.s1 FR_l_r_cor = FR_l_GS_hi, f1 // r_cor = GS_hi -1
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_l_GS_lo = FR_l_G, FR_l_S, FR_l_GS_hi // G*S-GS_hi
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_poly = FR_l_r, FR_l_Q_2, FR_l_Q_1 //poly=r*Q2+Q1
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fmpy.s1 FR_l_rsq = FR_l_r, FR_l_r // rsq = r * r
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_G = FR_l_float_N, FR_l_log2_hi, FR_l_H // Tbl =
|
|
// float_N*log2_hi + H
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_Y_lo = FR_l_float_N, FR_l_log2_lo, FR_l_h // Y_lo=
|
|
// float_N*log2_lo + h
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_B14 = FR_l_B18, FR_l_Z4, FR_l_B14 //bernulli tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_B2 = FR_l_B6, FR_l_Z4, FR_l_B2 //bernulli tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_Z8 = FR_l_Z4, FR_l_Z4, f0 //bernulli tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_poly_lo = FR_l_r, FR_l_poly_lo, FR_l_Q_4 // poly_lo =
|
|
// Q_4 + r * poly_lo
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fsub.s1 FR_l_r_cor = FR_l_r_cor, FR_l_r // r_cor = r_cor - r
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_PolyL = FR_n_PolyL, f1, FR_n_TH // polyL+TH
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_TT = FR_n_TL, f1, FR_n_A1L // TL+A1L
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fadd.s1 FR_l_logl_YHi = FR_l_G, FR_l_r // Y_hi = Tbl + r
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_B10 = FR_l_B14, FR_l_Z4, FR_l_B10 //bernulli tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_poly_lo = FR_l_r, FR_l_poly_lo, FR_l_Q_3 // poly_lo =
|
|
// Q_3 + r * poly_lo
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fadd.s1 FR_l_r_cor = FR_l_r_cor, FR_l_GS_lo // r_cor=r_cor+GS_lo
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_PolyL = FR_n_PolyL, f1, FR_n_TT // polyL+TT
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fsub.s1 FR_l_Y_lo_res = FR_l_G, FR_l_logl_YHi // Y_lo = Tbl - Y_hi
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_XYH = FR_l_logl_YHi, FR_l_AbsX_m_Half, f0 // XYH=
|
|
// YHi*|x-0.5|
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_SS = FR_l_B10, FR_l_Z8, FR_l_B2 // Bernoulli tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fadd.s1 FR_l_r_cor = FR_l_r_cor, FR_l_Y_lo // r_cor = r_cor+Y_lo
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_poly = FR_l_rsq, FR_l_poly_lo, FR_l_poly //poly=
|
|
// r^2*polyLo+poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_TT = FR_n_PolyL, FR_n_XS2, f0 // T=polyL*xs^2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fadd.s1 FR_l_Y_lo = FR_l_Y_lo_res, FR_l_r // Y_lo = Y_lo + r
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_l_XYL = FR_l_logl_YHi, FR_l_AbsX_m_Half, FR_l_XYH
|
|
// XYL = YHi*|x-0.5|-XYH
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_SSCXH = FR_l_SS, FR_l_Z, FR_l_CXH // SS*Z+CXH
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
mov GR_e_exp_2tom51= 0xffff-51 // 2^-51
|
|
(p15) fma.s1 FR_l_SignedXYH = FR_l_XYH, FR_n_NegOne, f0 // XYH = -XYH
|
|
// for negatives
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mlx
|
|
nop.m 0
|
|
movl GR_e_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51)
|
|
}
|
|
{ .mlx
|
|
nop.m 0
|
|
movl GR_e_sig_inv_ln2 = 0xb8aa3b295c17f0bc //significand of 1/ln2
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_poly = FR_l_rsq, FR_l_poly, FR_l_r_cor // poly =
|
|
// rsq * poly + r_cor
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
addl GR_e_ad_Arg = @ltoff(Constants_Tgammal_exp_64_Arg#),gp
|
|
(p15) fma.s1 FR_n_TT = FR_n_PolyH, FR_n_XS2L, FR_n_TT
|
|
mov GR_e_exp_mask = 0x1FFFF // Form exponent mask
|
|
}
|
|
{ .mlx
|
|
nop.m 0
|
|
movl GR_e_rshf = 0x43e8000000000000 // 1.10000 2^63 rshift
|
|
};;
|
|
|
|
|
|
{ .mmi
|
|
setf.sig FR_e_INV_LN2_2TO63 = GR_e_sig_inv_ln2 // form 1/ln2 * 2^63
|
|
setf.d FR_e_RSHF_2TO51 = GR_e_rshf_2to51 // 1.1000 * 2^(63+51)
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_l_SSCXL = FR_l_CXH, f1, FR_l_SSCXH // CXH+SS*CXH
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_e_expl_Input_AbsX = FR_l_XYH, f1, FR_l_SSCXH // HI EXP
|
|
nop.i 0
|
|
};;
|
|
|
|
.pred.rel "mutex",p14,p15
|
|
{ .mfi
|
|
nop.m 0
|
|
(p14) fma.s1 FR_e_expl_Input_X = FR_l_XYH, f1, FR_l_SSCXH // HI EXP
|
|
mov GR_e_exp_bias = 0x0FFFF // Set exponent bias
|
|
}
|
|
{ .mfi
|
|
ld8 GR_e_ad_Arg = [GR_e_ad_Arg] // Point to Arg table
|
|
(p15) fms.s1 FR_e_expl_Input_X = FR_l_SignedXYH, f1, FR_l_SSCXH // HI EXP
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fadd.s1 FR_l_logl_YLo = FR_l_Y_lo, FR_l_poly // YLo = YLo+poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
setf.exp FR_e_2TOM51 = GR_e_exp_2tom51 //2^-51 for scaling float_N
|
|
(p15) fma.s1 FR_n_TH = FR_n_PolyH, FR_n_XS2, FR_n_TT // TH=
|
|
// polyH*xs^2+T
|
|
nop.i 0
|
|
}
|
|
{ .mib
|
|
setf.d FR_e_RSHF = GR_e_rshf // Right shift const 1.1000*2^63
|
|
nop.i 0
|
|
nop.b 0
|
|
};;
|
|
|
|
{ .mfi
|
|
add GR_e_ad_A = 0x20, GR_e_ad_Arg // Point to A table
|
|
nop.f 0
|
|
add GR_e_ad_T1 = 0x50, GR_e_ad_Arg // Point to T1 table
|
|
}
|
|
{ .mfi
|
|
add GR_e_ad_T2 = 0x150, GR_e_ad_Arg // Point to T2 table
|
|
nop.f 0
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_SSCXL = FR_l_SS, FR_l_Z, FR_l_SSCXL
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_e_expl_Input_Y = FR_l_XYH, f1, FR_e_expl_Input_AbsX
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_e_L_hi = [GR_e_ad_Arg],16 // Get L_hi
|
|
nop.f 0
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_XYL = FR_l_logl_YLo, FR_l_AbsX_m_Half, FR_l_XYL
|
|
// XYL = YLo*|x-0.5|+XYL
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_e_L_lo = [GR_e_ad_Arg],16 // Get L_lo
|
|
(p15) fms.s1 FR_n_TL = FR_n_PolyH, FR_n_XS2, FR_n_TH // TL =
|
|
// = polyH*xs^2-TH
|
|
add GR_e_ad_W1 = 0x100, GR_e_ad_T2 // Point to W1 table
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_Poly1H = FR_n_TH, f1, f1 // poly1H = TH+1
|
|
add GR_e_ad_W2 = 0x300, GR_e_ad_T2 // Point to W2 table
|
|
};;
|
|
|
|
{ .mmi
|
|
getf.exp GR_e_signexp_x = FR_e_expl_Input_X // Extract sign and exp
|
|
ldfe FR_e_A3 = [GR_e_ad_A],16 // Get A3
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_SSCXL = FR_l_SSCXL, f1, FR_l_CXL
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, f1, FR_l_SSCXH
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_e_N_signif=FR_e_expl_Input_X,FR_e_INV_LN2_2TO63,FR_e_RSHF_2TO51
|
|
and GR_e_exp_x = GR_e_signexp_x, GR_e_exp_mask
|
|
};;
|
|
|
|
{ .mmi
|
|
sub GR_e_exp_x = GR_e_exp_x, GR_e_exp_bias // Get exponent
|
|
ldfe FR_e_A2 = [GR_e_ad_A],16 // Get A2 for main path
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_PolyH = FR_n_Poly1H, FR_n_XS, f0//sin(Pi*x) poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fms.s1 FR_n_Poly1L = f1, f1, FR_n_Poly1H//sin(Pi*x) poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_TL = FR_n_TL, f1, FR_n_TT//sin(Pi*x) poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_l_Temp = FR_l_XYL, f1, FR_l_SSCXL // XYL+SS*CXL
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, FR_n_NegOne, f0
|
|
// Negate lo part of exp argument for negative input values
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_e_A1 = [GR_e_ad_A],16 // Get A1
|
|
nop.f 0
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_e_float_N = FR_e_N_signif, FR_e_2TOM51, FR_e_RSHF
|
|
// Get float N = signd*2^51-RSHIFTER
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_Poly1L = FR_n_Poly1L, f1, FR_n_TH //sin(Pi*x) poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fms.s1 FR_n_PolyL = FR_n_Poly1H, FR_n_XS, FR_n_PolyH//sin(Pi*x)
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
getf.sig GR_e_N_fix = FR_e_N_signif // Get N from significand
|
|
nop.f 0
|
|
nop.i 0
|
|
};;
|
|
|
|
.pred.rel "mutex",p14,p15
|
|
{ .mfi
|
|
nop.m 0
|
|
(p14) fma.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, f1, FR_l_Temp
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fms.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, f1, FR_l_Temp
|
|
// arguments for exp computation
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_e_r = FR_e_L_hi, FR_e_float_N, FR_e_expl_Input_X
|
|
// r = -L_hi * float_N + x
|
|
extr.u GR_e_M1 = GR_e_N_fix, 6, 6 // Extract index M_1
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_Poly1L = FR_n_Poly1L, f1, FR_n_TL //sin(Pi*x) poly
|
|
nop.i 0
|
|
};;
|
|
|
|
|
|
{ .mmf
|
|
nop.m 0
|
|
nop.m 0
|
|
fma.s1 FR_e_r = FR_e_r, f1, FR_e_expl_Input_Y
|
|
// r = r + FR_e_expl_Input_Y
|
|
};;
|
|
|
|
{ .mmi
|
|
shladd GR_e_ad_W1 = GR_e_M1,3,GR_e_ad_W1 // Point to W1
|
|
shladd GR_e_ad_T1 = GR_e_M1,2,GR_e_ad_T1 // Point to T1
|
|
extr.u GR_e_M2 = GR_e_N_fix, 0, 6 // Extract index M_2
|
|
};;
|
|
|
|
|
|
{ .mfi
|
|
ldfs FR_e_T1 = [GR_e_ad_T1],0 // Get T1
|
|
nop.f 0
|
|
extr GR_e_K = GR_e_N_fix, 12, 32 //Extract limit range K
|
|
}
|
|
{ .mfi
|
|
shladd GR_e_ad_T2 = GR_e_M2,2,GR_e_ad_T2 // Point to T2
|
|
(p15) fma.s1 FR_n_PolyL = FR_n_Poly1L, FR_n_XS, FR_n_PolyL
|
|
//sin(Pi*x) poly
|
|
shladd GR_e_ad_W2 = GR_e_M2,3,GR_e_ad_W2 // Point to W2
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfs FR_e_T2 = [GR_e_ad_T2],0 // Get T2
|
|
nop.f 0
|
|
add GR_e_exp_2_k = GR_e_exp_bias, GR_e_K // exp of 2^k
|
|
}
|
|
{ .mfi
|
|
ldfd FR_e_W1 = [GR_e_ad_W1],0 // Get W1
|
|
nop.f 0
|
|
sub GR_e_exp_2_mk = GR_e_exp_bias, GR_e_K // exp of 2^-k
|
|
};;
|
|
|
|
{ .mmi
|
|
ldfd FR_e_W2 = [GR_e_ad_W2],0 // Get W2
|
|
nop.m 0
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mmf
|
|
setf.exp FR_e_scale = GR_e_exp_2_k // Set scale = 2^k
|
|
setf.exp FR_e_2_mk = GR_e_exp_2_mk // Form 2^-k
|
|
fnma.s1 FR_e_r = FR_e_L_lo, FR_e_float_N, FR_e_r
|
|
// r = -L_lo * float_N + r
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_PolyL = FR_n_Tail, FR_n_XS7, FR_n_PolyL
|
|
//sin(Pi*x) poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_e_poly = FR_e_r, FR_e_A3, FR_e_A2 // poly=r*A3+A2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fmpy.s1 FR_e_rsq = FR_e_r, FR_e_r // rsq = r * r
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fmpy.s1 FR_e_T = FR_e_T1, FR_e_T2 // T = T1 * T2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fadd.s1 FR_e_W1_p1 = FR_e_W1, f1 // W1_p1 = W1 + 1.0
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_TT = FR_n_PolyL, FR_l_AbsX, f0 //sin(Pi*x) poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_e_poly = FR_e_r, FR_e_poly, FR_e_A1
|
|
// poly = r * poly + A1
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_e_T_scale = FR_e_T, FR_e_scale, f0 // T_scale=T*scale
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_e_W = FR_e_W2, FR_e_W1_p1, FR_e_W1
|
|
// W = W2 * (W1+1.0) + W1
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_SinxH = FR_n_PolyH, FR_l_AbsX, FR_n_TT
|
|
// sin(Pi*x) poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
mov FR_e_Y_hi = FR_e_T // Assume Y_hi = T
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_e_poly = FR_e_rsq, FR_e_poly, FR_e_r
|
|
// poly = rsq * poly + r
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_e_Wp1_T_scale = FR_e_W, FR_e_T_scale, FR_e_T_scale
|
|
// (W+1)*T*scale
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_e_W_T_scale = FR_e_W, FR_e_T_scale, f0 // W*T*scale
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fms.s1 FR_n_SinxL = FR_n_PolyH, FR_l_AbsX, FR_n_SinxH
|
|
// Low part of sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) frcpa.s1 FR_n_Y0, p0 = f1, FR_n_SinxH // y = frcpa(b)
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_e_result_lo = FR_e_Wp1_T_scale, FR_e_poly, FR_e_W_T_scale
|
|
// Low part of exp result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_SinxL = FR_n_SinxL, f1, FR_n_TT // sin low result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fma.s1 FR_n_Q0 = f1,FR_n_Y0,f0 // q = y
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p15) fnma.s1 FR_n_E0 = FR_n_Y0, FR_n_SinxH, f1 // e = 1-b*y
|
|
nop.i 0
|
|
};;
|
|
|
|
|
|
{ .mfb
|
|
nop.m 0
|
|
(p14) fma.s0 f8 = FR_e_Y_hi, FR_e_scale, FR_e_result_lo
|
|
(p14) br.ret.spnt b0 // Exit for positive Stirling path //////////////////////
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_e_expl_Output_X = FR_e_Y_hi, FR_e_scale, f0 // exp result
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_e_expl_Output_Y = FR_e_result_lo, f1, f0// exp lo result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_n_E2 = FR_n_E0,FR_n_E0,FR_n_E0 // e2 = e+e^2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_n_E1 = FR_n_E0,FR_n_E0,f0 // e1 = e^2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_n_Y1 = FR_n_Y0,FR_n_E2,FR_n_Y0 // y1 = y+y*e2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_n_E3 = FR_n_E1,FR_n_E1,FR_n_E0 // e3 = e+e1^2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_n_Y2 = FR_n_Y1,FR_n_E3,FR_n_Y0 // y2 = y+y1*e3
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_n_R0 = FR_n_SinxH,FR_n_Q0,f1 // r = a-b*q
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_n_E4 = FR_n_SinxH,FR_n_Y2,f1 // e4 = 1-b*y2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_n_RcpResH = FR_n_R0,FR_n_Y2,FR_n_Q0 // x = q+r*y2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_n_Y3 = FR_n_Y2,FR_n_E4,FR_n_Y2 // y3 = y2+y2*e4
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_n_R1 = FR_n_SinxH,FR_n_RcpResH,f1 // r1 = a-b*x
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_n_R1 = FR_n_SinxL,FR_n_RcpResH,FR_n_R1
|
|
// r1 = r1 - b_lo*X
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_n_RcpResL = FR_n_R1,FR_n_Y3,f0 // x_lo = r1*y3
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_n_Temp = FR_n_RcpResH, FR_e_expl_Output_Y, f0
|
|
// Multiplying exp and sin result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_n_Temp = FR_n_RcpResL, FR_e_expl_Output_X, FR_n_Temp
|
|
// Multiplying exp and sin result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_n_ResH = FR_n_RcpResH, FR_e_expl_Output_X, FR_n_Temp
|
|
// Multiplying exp and sin result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_n_ResL = FR_n_RcpResH, FR_e_expl_Output_X, FR_n_ResH
|
|
// Multiplying exp and sin result
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p12) fma.s1 FR_n_ResH = FR_n_ResH, FR_n_NegOne, f0 // Negate
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_n_ResL = FR_n_ResL, f1, FR_n_Temp
|
|
// Multiplying exp and sin result - low result obtained
|
|
nop.i 0
|
|
};;
|
|
|
|
.pred.rel "mutex",p12,p13
|
|
{ .mfi
|
|
nop.m 0
|
|
(p13) fma.s0 f8 = FR_n_ResH, f1, FR_n_ResL // For odd
|
|
nop.i 0
|
|
}
|
|
{ .mfb
|
|
nop.m 0
|
|
(p12) fms.s0 f8 = FR_n_ResH, f1, FR_n_ResL // For even
|
|
br.ret.sptk b0 // Exit for negative Stirling path //////////////////////
|
|
};;
|
|
|
|
|
|
//////////// 1 <= |X| < 13 path ////////////////////////////////////////////////
|
|
//------------------------------------------------------------------------------
|
|
.align 64
|
|
tgamma_lt_13:
|
|
{ .mfi
|
|
getf.sig GR_p_XN = FR_p_IXN // Get significand
|
|
fcvt.xf FR_p_XN = FR_p_IXN // xn = [x]
|
|
add GR_r_sin_Table2= 0x40, GR_r_sin_Table // Shifted table addr.
|
|
}
|
|
{ .mfi
|
|
ldfpd FR_p_0p5, FR_p_1p5 = [GR_c_Table], 16 // 0.5 & 1.5
|
|
fms.s1 FR_p_AbsXM1 = FR_p_AbsX, f1, f1 // X-1
|
|
add GR_p_Table2 = 0xB0, GR_p_Table
|
|
};;
|
|
|
|
{ .mfi
|
|
add GR_r_sin_Table = -16, GR_r_sin_Table // For compensation
|
|
fcvt.xf FR_r_XNS = FR_r_IXNS // Convert int repr to float
|
|
shr.u GR_p_X_Sgnd = GR_p_X_Sgnd, 59 // Get only 5 bit of signd
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfpd FR_r_A2H,FR_r_A2L = [GR_r_sin_Table], 16 // Load A2
|
|
nop.f 0
|
|
add GR_p_Int = -2, GR_p_XN // int = int - 2
|
|
}
|
|
{ .mfi
|
|
ldfe FR_r_A6 = [GR_r_sin_Table2], 16
|
|
nop.f 0
|
|
cmp.gtu p11, p12 = 0x2, GR_p_XN // p11: x < 2 (splitted intervals),
|
|
// p12: x > 2 (base intervals)
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfpd FR_r_A1H, FR_r_A1L = [GR_r_sin_Table], 16
|
|
nop.f 0
|
|
shr GR_p_Int = GR_p_Int, 1 // int/2
|
|
}
|
|
{ .mfi
|
|
ldfe FR_r_A5 = [GR_r_sin_Table2], 16
|
|
nop.f 0
|
|
(p11) cmp.gtu.unc p10, p11 = 0x1C, GR_p_X_Sgnd // sgnd(x) < 0.75
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_r_A9 = [GR_r_sin_Table], 16
|
|
nop.f 0
|
|
shl GR_p_Offset = GR_p_Int, 4 // offset = int*16
|
|
}
|
|
{ .mfi
|
|
ldfe FR_r_A4 = [GR_r_sin_Table2], 16
|
|
nop.f 0
|
|
(p10) cmp.gtu.unc p9, p10 = 0x14, GR_p_X_Sgnd // sgnd(x) < 0.25
|
|
};;
|
|
|
|
|
|
{ .mfi
|
|
ldfe FR_r_A8 = [GR_r_sin_Table], 16
|
|
nop.f 0
|
|
(p12) tbit.nz.unc p13, p12 = GR_p_XN, 0x0 // p13: reccurent computations
|
|
// X is at [3;4], [5;6], [7;8]... interval
|
|
}
|
|
{ .mfi
|
|
ldfe FR_r_A3 = [GR_r_sin_Table2], 16
|
|
nop.f 0
|
|
shladd GR_p_Offset = GR_p_Int, 2, GR_p_Offset // +int*4
|
|
};;
|
|
|
|
.pred.rel "mutex",p9,p11
|
|
{ .mfi
|
|
add GR_p_Offset = GR_p_Int, GR_p_Offset
|
|
// +int, so offset = int*21
|
|
(p9) fms.s1 FR_p_XR = FR_p_AbsX, f1, f1 // r = x-1
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfe FR_r_A7 = [GR_r_sin_Table], 16
|
|
(p11) fms.s1 FR_p_XR = FR_p_2, f1, FR_p_AbsX
|
|
// r = 2-x for 1.75 < x < 2
|
|
nop.i 0
|
|
};;
|
|
|
|
.pred.rel "mutex",p9,p10
|
|
.pred.rel "mutex",p10,p11
|
|
.pred.rel "mutex",p9,p11
|
|
{ .mfi
|
|
(p9) add GR_p_Offset = 126, r0 // 1.0 < x < 1.25 table
|
|
(p15) fcmp.eq.unc.s1 p7,p0 = FR_p_AbsX, FR_p_XN
|
|
// If arg is integer and negative - singularity branch
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
(p10) add GR_p_Offset = 147, r0 // 1.25 < x < 1.75 table
|
|
nop.f 0
|
|
(p11) add GR_p_Offset = 168, r0 // 1.75 < x < 2.0 table
|
|
};;
|
|
|
|
{ .mmf
|
|
shladd GR_p_Table = GR_p_Offset, 4, GR_p_Table
|
|
shladd GR_p_Table2 = GR_p_Offset, 4, GR_p_Table2
|
|
fma.s1 FR_r_XS = FR_r_AbsX , f1, FR_r_XNS // xs = x - [x]
|
|
};;
|
|
|
|
{ .mmb
|
|
ldfpd FR_p_A5H, FR_p_A5L = [GR_p_Table], 16
|
|
ldfpd FR_p_A2H, FR_p_A2L = [GR_p_Table2], 16
|
|
(p7) br.cond.spnt tgammal_singularity // Singularity for integer /////////////
|
|
// and negative argument ///////////////
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfpd FR_p_A4H, FR_p_A4L = [GR_p_Table], 16
|
|
fma.s1 FR_p_XN = FR_p_XN, f1, FR_p_0p5 // xn = xn+0.5
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfpd FR_p_A1H, FR_p_A1L = [GR_p_Table2], 16
|
|
(p10) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_1p5 // r = x - 1.5
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mmi
|
|
ldfpd FR_p_A3H, FR_p_A3L = [GR_p_Table], 16
|
|
ldfpd FR_p_A0H, FR_p_A0L = [GR_p_Table2], 16
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mmi
|
|
ldfe FR_p_A20 = [GR_p_Table], 16
|
|
ldfe FR_p_A12 = [GR_p_Table2], 16
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mmf
|
|
ldfe FR_p_A19 = [GR_p_Table], 16
|
|
ldfe FR_p_A11 = [GR_p_Table2], 16
|
|
fma.s1 FR_r_XS2 = FR_r_XS, FR_r_XS, f0 // xs2 = xs*xs
|
|
};;
|
|
|
|
{ .mmi
|
|
ldfe FR_p_A18 = [GR_p_Table], 16
|
|
ldfe FR_p_A10 = [GR_p_Table2], 16
|
|
nop.i 0
|
|
};;
|
|
|
|
.pred.rel "mutex",p12,p13
|
|
{ .mfi
|
|
ldfe FR_p_A17 = [GR_p_Table], 16
|
|
(p12) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_XN // r = x - xn
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfe FR_p_A9 = [GR_p_Table2], 16
|
|
(p13) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_XN
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mmi
|
|
ldfe FR_p_A16 = [GR_p_Table], 16
|
|
ldfe FR_p_A8 = [GR_p_Table2], 16
|
|
(p9) cmp.eq p12, p0 = r0, r0 // clear p12
|
|
};;
|
|
|
|
{ .mmi
|
|
ldfe FR_p_A15 = [GR_p_Table], 16
|
|
ldfe FR_p_A7 = [GR_p_Table2], 16
|
|
(p10) cmp.eq p12, p0 = r0, r0 // clear p12
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_p_A14 = [GR_p_Table], 16
|
|
fma.s1 FR_r_TH = FR_r_A2H, FR_r_XS2, f0 // sin for neg
|
|
(p11) cmp.eq p12, p0 = r0, r0 // clear p12
|
|
}
|
|
{ .mfi
|
|
ldfe FR_p_A6 = [GR_p_Table2], 16
|
|
fma.s1 FR_r_TL = FR_r_A2L, FR_r_XS2, f0 // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_p_A13 = [GR_p_Table], 16
|
|
fms.s1 FR_r_XS2L = FR_r_XS, FR_r_XS, FR_r_XS2 // x2Lo part
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp5H = FR_p_A5H, FR_p_XR, f0 // A5H*r
|
|
// 'Low poly'
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_XR2 = FR_p_XR, FR_p_XR, f0 // r^2 = r*r
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fabs FR_r_XS = FR_r_XS // abs(xs)
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp2H = FR_p_A2H, FR_p_XR, f0 // A2H*r
|
|
// 'High poly'
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_r_TT = FR_r_A2H, FR_r_XS2, FR_r_TH // sin for neg
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResH = FR_r_TH, f1, FR_r_A1H // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TL = FR_r_A2H, FR_r_XS2L, FR_r_TL // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Temp5L = FR_p_A5H,FR_p_XR,FR_p_Temp5H //A5H*r delta
|
|
// 'Low poly'
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly5H = FR_p_Temp5H, f1, FR_p_A4H // A5H*r+A4H
|
|
// 'Low poly'
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Temp2L = FR_p_A2H, FR_p_XR, FR_p_Temp2H//A2H*r delta
|
|
//'High poly'
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly2H = FR_p_Temp2H, f1, FR_p_A1H // A2H*r+A1H
|
|
//'High poly'
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_XR3 = FR_p_XR2, FR_p_XR, f0 // r^3 = r^2*r
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_XR2L = FR_p_XR, FR_p_XR, FR_p_XR2 // r^2 delta
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A18 = FR_p_A19, FR_p_XR, FR_p_A18 // Poly tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A14 = FR_p_A15, FR_p_XR, FR_p_A14 // Poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_XR4 = FR_p_XR2, FR_p_XR2, f0 // r^4 = r^2*r^2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp5L = FR_p_A5L, FR_p_XR, FR_p_Temp5L// Low part
|
|
// of A5*r+A4
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Poly5L = FR_p_A4H, f1, FR_p_Poly5H // Low part
|
|
// of A5*r+A4
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp4H = FR_p_Poly5H, FR_p_XR, f0 // (A5H*r+A4H)*r
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp2L = FR_p_A2L, FR_p_XR, FR_p_Temp2L // A2*r low
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Poly2L = FR_p_A1H, f1, FR_p_Poly2H // High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp1H = FR_p_Poly2H, FR_p_XR, f0 // High poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_XR3L = FR_p_XR2, FR_p_XR, FR_p_XR3 // x^3 delta
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A16 = FR_p_A17, FR_p_XR, FR_p_A16 // Poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_r_ResL = FR_r_A1H, f1, FR_r_ResH // sin for neg
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp5L = FR_p_Temp5L, f1, FR_p_A4L // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5H // Low poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Temp4L = FR_p_Poly5H,FR_p_XR,FR_p_Temp4H //Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly4H = FR_p_Temp4H, f1, FR_p_A3H // Low poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp2L = FR_p_Temp2L, f1, FR_p_A1L // High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2H // High poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Temp1L = FR_p_Poly2H,FR_p_XR,FR_p_Temp1H //High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly1H = FR_p_Temp1H, f1, FR_p_A0H // High poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A12 = FR_p_A13, FR_p_XR, FR_p_A12 // Poly tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_XR3L = FR_p_XR2L, FR_p_XR, FR_p_XR3L // x^3 low
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5L // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A10 = FR_p_A11, FR_p_XR, FR_p_A10 // Poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Poly4L = FR_p_A3H, f1, FR_p_Poly4H // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A6 = FR_p_A7, FR_p_XR, FR_p_A6 // Poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A8 = FR_p_A9, FR_p_XR, FR_p_A8 // Poly tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_XR6 = FR_p_XR4, FR_p_XR2, f0 // Poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2L // High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Poly1L = FR_p_A0H, f1, FR_p_Poly1H // High poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TH // sin for neg
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TT = FR_r_TL, f1, FR_r_A1L // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp4L = FR_p_Poly5L,FR_p_XR,FR_p_Temp4L // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A18 = FR_p_A20, FR_p_XR2, FR_p_A18 // Poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4H // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A14 = FR_p_A16, FR_p_XR2, FR_p_A14 // Poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A6 = FR_p_A8, FR_p_XR2, FR_p_A6 // Poly tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A10 = FR_p_A12, FR_p_XR2, FR_p_A10 // Poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp1L = FR_p_Poly2L,FR_p_XR,FR_p_Temp1L //High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1H // High poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TT // sin for neg
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TH = FR_r_ResH, FR_r_XS2, f0 // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp4L = FR_p_Temp4L, f1, FR_p_A3L // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly3H = FR_p_Poly4H, FR_p_XR3, f0 // Low poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A14 = FR_p_A18, FR_p_XR4, FR_p_A14 // Poly tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_XR8 = FR_p_XR4, FR_p_XR4, f0 // Poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TL = FR_r_ResH, FR_r_XS2L, f0 // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp1L = FR_p_Temp1L, f1, FR_p_A0L // High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A6 = FR_p_A10, FR_p_XR4, FR_p_A6 // Poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_r_TT = FR_r_ResH, FR_r_XS2, FR_r_TH // sin for neg
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Res3H = FR_r_TH, f1, f1 // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4L // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly3L = FR_p_Poly4H, FR_p_XR3L, f0 // Low poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly0H = FR_p_Poly3H,f1,FR_p_Poly1H //Low & High add
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_A7 = FR_r_A8, FR_r_XS2, FR_r_A7 // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TL = FR_r_ResL, FR_r_XS2, FR_r_TL // sin for neg
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_XS4 = FR_r_XS2, FR_r_XS2, f0 // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1L // High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_PolyTail = FR_p_A14, FR_p_XR8, FR_p_A6 // Poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_r_Res3L = f1, f1, FR_r_Res3H // sin for neg
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResH = FR_r_Res3H, FR_r_XS, f0 // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Temp0L = FR_p_Poly4H,FR_p_XR3,FR_p_Poly3H //Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly3L = FR_p_Poly4L,FR_p_XR3,FR_p_Poly3L //Low poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Poly0L = FR_p_Poly1H,f1,FR_p_Poly0H //Low & High add
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p13) fma.s1 FR_p_OddPoly0H = FR_p_Poly0H, FR_p_AbsXM1, f0
|
|
// Reccurent computations - multiplying by X-1
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // sin for neg
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_A3 = FR_r_A4, FR_r_XS2, FR_r_A3 // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly1L = FR_p_PolyTail,FR_p_XR6,FR_p_Poly1L//High
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_A5 = FR_r_A6, FR_r_XS2, FR_r_A5 // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TH // sin for neg
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_r_ResL = FR_r_Res3H, FR_r_XS, FR_r_ResH//sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly3L = FR_p_Poly3L, f1, FR_p_Temp0L // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_A7 = FR_r_A9, FR_r_XS4, FR_r_A7 // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly0L = FR_p_Poly0L,f1,FR_p_Poly3H //Low & High add
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p13) fms.s1 FR_p_OddPoly0L = FR_p_Poly0H, FR_p_AbsXM1, FR_p_OddPoly0H
|
|
// Reccurent computations - multiplying by X-1 (low part)
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_A3 = FR_r_A5, FR_r_XS4, FR_r_A3 // sin for neg
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_XS7 = FR_r_XS4, FR_r_XS2, f0 // xs^6
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TL // sin for neg
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_XS8 = FR_r_XS4, FR_r_XS4, f0 // sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp0H = FR_p_Poly3L,f1,FR_p_Poly1L //Low & High add
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_XS7 = FR_r_XS7, FR_r_XS, f0 // xs^7
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResL = FR_r_Res3L, FR_r_XS, FR_r_ResL//sin for neg
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Tail = FR_r_A7, FR_r_XS8, FR_r_A3 // sin tail res
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly0L = FR_p_Poly0L,f1,FR_p_Temp0H //Low & High add
|
|
nop.i 0
|
|
};;
|
|
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResL = FR_r_Tail,FR_r_XS7,FR_r_ResL //sin for neg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p13) fma.s1 FR_p_OddPoly0L = FR_p_Poly0L, FR_p_AbsXM1, FR_p_OddPoly0L
|
|
// Reccurent computations - multiplying by X-1 (low part)
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TT = FR_r_ResL, FR_r_AbsX, f0 // X*sin
|
|
nop.i 0
|
|
};;
|
|
|
|
.pred.rel "mutex",p12,p13
|
|
{ .mfi
|
|
nop.m 0
|
|
(p12) fma.s0 f8 = FR_p_Poly0H, f1, FR_p_Poly0L // Even
|
|
nop.i 0
|
|
}
|
|
{ .mfb
|
|
nop.m 0
|
|
(p13) fma.s0 f8 = FR_p_OddPoly0H, f1, FR_p_OddPoly0L // Odd
|
|
(p14) br.ret.spnt b0 // Exit for 1 <= |X| < 13 path (positive arguments)/////
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p13) fma.s1 FR_p_Poly0H = FR_p_OddPoly0H, f1, f0
|
|
// Reccurent computations
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p13) fma.s1 FR_p_Poly0L = FR_p_OddPoly0L, f1, f0
|
|
// Reccurent computations
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Res1H = FR_r_ResH, FR_r_AbsX, FR_r_TT // X*sin
|
|
(p11) cmp.eq p13, p12 = r0, r0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_r_Res1L = FR_r_ResH,FR_r_AbsX,FR_r_Res1H// X*sin
|
|
(p9) cmp.eq p13, p12 = r0, r0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Res1L = FR_r_Res1L, f1, FR_r_TT // sin for neg
|
|
(p10) cmp.eq p13, p12 = r0, r0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TL = FR_p_Poly0L, FR_r_Res1H, f0 // mult by sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TL = FR_p_Poly0H,FR_r_Res1L,FR_r_TL//mult by sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResH = FR_p_Poly0H,FR_r_Res1H,FR_r_TL//mult by sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_r_ResL = FR_p_Poly0H,FR_r_Res1H,FR_r_ResH//sin mult
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
frcpa.s1 FR_r_Y0,p0 = f1,FR_r_ResH // y = frcpa(b)
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fneg FR_r_NegOne = f1 // Form -1.0
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TL //Low result of mult
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Q0 = f1,FR_r_Y0,f0 // q = a*y
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_r_E0 = FR_r_Y0,FR_r_ResH,f1 // e = 1-b*y
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_E2 = FR_r_E0,FR_r_E0,FR_r_E0 // e2 = e+e^2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_E1 = FR_r_E0,FR_r_E0,f0 // e1 = e^2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Y1 = FR_r_Y0,FR_r_E2,FR_r_Y0 // y1 = y+y*e2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_E3 = FR_r_E1,FR_r_E1,FR_r_E0 // e3 = e+e1^2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Y2 = FR_r_Y1,FR_r_E3,FR_r_Y0 // y2 = y+y1*e3
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_r_R0 = FR_r_ResH,FR_r_Q0,f1 // r = a-b*q
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_r_E4 = FR_r_ResH,FR_r_Y2,f1 // e4 = 1-b*y2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ZH = FR_r_R0,FR_r_Y2,FR_r_Q0 // x = q+r*y2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Y3 = FR_r_Y2,FR_r_E4,FR_r_Y2 // y3 = y2+y2*e4
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_r_R1 = FR_r_ResH,FR_r_ZH,f1 // r1 = a-b*x
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_r_R1 = FR_r_ResL,FR_r_ZH,FR_r_R1 // r1=r1-b_lo*X
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
(p12) fma.s1 FR_r_ZHN = FR_r_ZH,FR_r_NegOne, f0 // Negate for evens
|
|
nop.i 0
|
|
};;
|
|
|
|
.pred.rel "mutex",p13,p12
|
|
{ .mfi
|
|
nop.m 0
|
|
(p13) fma.s0 f8 = FR_r_R1,FR_r_Y3,FR_r_ZH // Final result
|
|
nop.i 0
|
|
}
|
|
{ .mfb
|
|
nop.m 0
|
|
(p12) fnma.s0 f8 = FR_r_R1,FR_r_Y3,FR_r_ZHN // Final result
|
|
br.ret.sptk b0 // Exit for 1 <= |X| < 13 path (negative arguments)//////
|
|
};;
|
|
|
|
|
|
//////////// |X| < 1 path /////////////////////////////////////////////////////
|
|
//------------------------------------------------------------------------------
|
|
.align 64
|
|
tgamma_lt_1:
|
|
{ .mfi
|
|
getf.exp GR_p_Exp = FR_p_AbsX // exp of abs X
|
|
fma.s1 FR_z_Q0 = f1,FR_z_Y0,f0 // q = a*y
|
|
add GR_r_sin_Table2= 0x50, GR_r_sin_Table
|
|
}
|
|
{ .mfi
|
|
ldfpd FR_p_0p5, FR_p_1p5 = [GR_c_Table], 16
|
|
fnma.s1 FR_z_E0 = FR_z_Y0,f8,f1 // e = 1-b*y
|
|
add GR_p_Table2 = 0xB0, GR_p_Table
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfd FR_p_0p25 = [GR_c_Table]
|
|
fcvt.xf FR_r_XNS = FR_r_IXNS // Convert int repr to float
|
|
shr.u GR_p_X_Sgnd = GR_p_X_Sgnd, 60
|
|
// Obtain only 4 bits of significand
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
nop.f 0
|
|
add GR_p_Bias = 0xffff, r0 // Set bias
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfpd FR_r_A2H, FR_r_A2L = [GR_r_sin_Table], 16
|
|
nop.f 0
|
|
shl GR_p_XN = GR_p_Exp, 4
|
|
// Shift exp to 4 bits left to set place for significand
|
|
}
|
|
{ .mlx
|
|
ldfe FR_r_A6 = [GR_r_sin_Table2], 16
|
|
movl GR_p_0p75 = 0xfffec // 0.75
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfpd FR_r_A1H, FR_r_A1L = [GR_r_sin_Table], 16
|
|
nop.f 0
|
|
or GR_p_XN = GR_p_XN, GR_p_X_Sgnd
|
|
// Combine exp with 4 high bits of significand
|
|
}
|
|
{ .mfi
|
|
ldfe FR_r_A5 = [GR_r_sin_Table2], 16
|
|
nop.f 0
|
|
sub GR_p_Exp = GR_p_Exp, GR_p_Bias // Unbiased exp
|
|
};;
|
|
|
|
{ .mmi
|
|
ldfe FR_r_A9 = [GR_r_sin_Table], 16
|
|
ldfe FR_r_A4 = [GR_r_sin_Table2], 16
|
|
cmp.gtu.unc p10, p11 = GR_p_0p75, GR_p_XN // sgnd(x) < 0.75
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_r_A8 = [GR_r_sin_Table], 16
|
|
fma.s1 FR_z_E2 = FR_z_E0,FR_z_E0,FR_z_E0 // e2 = e+e^2
|
|
(p10) cmp.gt.unc p9, p10 = -2, GR_p_Exp // x < 0.25
|
|
}
|
|
{ .mfi
|
|
ldfe FR_r_A3 = [GR_r_sin_Table2], 16
|
|
fma.s1 FR_z_E1 = FR_z_E0,FR_z_E0,f0 // e1 = e^2
|
|
(p11) add GR_p_Offset = 168, r0 // [0.75;1] interval
|
|
};;
|
|
|
|
{ .mmi
|
|
(p10) add GR_p_Offset = 147, r0 // [0.25;0.75] interval
|
|
ldfe FR_r_A7 = [GR_r_sin_Table], 16
|
|
(p9) cmp.gt.unc p8, p9 = -3, GR_p_Exp // x < 0.125
|
|
};;
|
|
|
|
.pred.rel "mutex",p9,p8
|
|
{ .mmi
|
|
(p9) add GR_p_Offset = 126, r0 // [0.125;0.25] interval
|
|
(p8) add GR_p_Offset = 189, r0 // [0.;0.125] interval
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mmf
|
|
shladd GR_p_Table = GR_p_Offset, 4, GR_p_Table //Make addresses
|
|
shladd GR_p_Table2 = GR_p_Offset, 4, GR_p_Table2
|
|
fma.s1 FR_r_XS = FR_r_AbsX , f1, FR_r_XNS // xs = |x|-[x]
|
|
};;
|
|
|
|
.pred.rel "mutex",p8,p11
|
|
{ .mfi
|
|
ldfpd FR_p_A5H, FR_p_A5L = [GR_p_Table], 16
|
|
(p11) fms.s1 FR_p_XR = f1, f1, FR_p_AbsX // r = 1 - |x|
|
|
// for [0.75;1] interval
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfpd FR_p_A2H, FR_p_A2L = [GR_p_Table2], 16
|
|
(p8) fms.s1 FR_p_XR = FR_p_AbsX, f1, f0 // r = |x|
|
|
// for [0.;0.125] interval
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfpd FR_p_A4H, FR_p_A4L = [GR_p_Table], 16
|
|
fma.s1 FR_z_Y1 = FR_z_Y0,FR_z_E2,FR_z_Y0 // y1 = y+y*e2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfpd FR_p_A1H, FR_p_A1L = [GR_p_Table2], 16
|
|
fma.s1 FR_z_E3 = FR_z_E1,FR_z_E1,FR_z_E0 // e3 = e+e1^2
|
|
nop.i 0
|
|
};;
|
|
|
|
.pred.rel "mutex",p9,p10
|
|
{ .mfi
|
|
ldfpd FR_p_A3H, FR_p_A3L = [GR_p_Table], 16
|
|
(p9) fms.s1 FR_p_XR = FR_p_AbsX, f1, f0 // r = |x|
|
|
// for [0.125;0.25] interval
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfpd FR_p_A0H, FR_p_A0L = [GR_p_Table2], 16
|
|
(p10) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_0p5 // r = |x| - 0.5
|
|
// for [0.25;0.75] interval
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mmi
|
|
ldfe FR_p_A20 = [GR_p_Table], 16
|
|
ldfe FR_p_A12 = [GR_p_Table2], 16
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_p_A19 = [GR_p_Table], 16
|
|
fma.s1 FR_r_XS2 = FR_r_XS, FR_r_XS, f0 // xs^2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfe FR_p_A11 = [GR_p_Table2], 16
|
|
nop.f 0
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mmi
|
|
ldfe FR_p_A18 = [GR_p_Table], 16
|
|
ldfe FR_p_A10 = [GR_p_Table2], 16
|
|
nop.i 0
|
|
};;
|
|
|
|
.pred.rel "mutex",p12,p13
|
|
{ .mfi
|
|
ldfe FR_p_A17 = [GR_p_Table], 16
|
|
fma.s1 FR_z_Y2 = FR_z_Y1,FR_z_E3,FR_z_Y0 // y2 = y+y1*e3
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfe FR_p_A9 = [GR_p_Table2], 16
|
|
fnma.s1 FR_z_R0 = f8,FR_z_Q0,f1 // r = a-b*q
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mmi
|
|
ldfe FR_p_A16 = [GR_p_Table], 16
|
|
ldfe FR_p_A8 = [GR_p_Table2], 16
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mmi
|
|
ldfe FR_p_A15 = [GR_p_Table], 16
|
|
ldfe FR_p_A7 = [GR_p_Table2], 16
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_p_A14 = [GR_p_Table], 16
|
|
fma.s1 FR_r_TH = FR_r_A2H, FR_r_XS2, f0 // neg sin
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
ldfe FR_p_A6 = [GR_p_Table2], 16
|
|
fma.s1 FR_r_TL = FR_r_A2L, FR_r_XS2, f0 // neg sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
ldfe FR_p_A13 = [GR_p_Table], 16
|
|
fms.s1 FR_r_XS2L = FR_r_XS, FR_r_XS, FR_r_XS2 // xs^2 delta
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp5H = FR_p_A5H, FR_p_XR, f0 // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_XR2 = FR_p_XR, FR_p_XR, f0 // poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fabs FR_r_XS = FR_r_XS // Absolute value of xs
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp2H = FR_p_A2H, FR_p_XR, f0 // High poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_z_E4 = f8,FR_z_Y2,f1 // e4 = 1-b*y2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_z_ZH = FR_z_R0,FR_z_Y2,FR_z_Q0 // 1/x = q+r*y2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_r_TT = FR_r_A2H, FR_r_XS2, FR_r_TH // neg sin
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResH = FR_r_TH, f1, FR_r_A1H // neg sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TL = FR_r_A2H, FR_r_XS2L, FR_r_TL // neg sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Temp5L = FR_p_A5H, FR_p_XR, FR_p_Temp5H // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly5H = FR_p_Temp5H, f1, FR_p_A4H // Low poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Temp2L = FR_p_A2H, FR_p_XR, FR_p_Temp2H // High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly2H = FR_p_Temp2H, f1, FR_p_A1H // High poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_XR3 = FR_p_XR2, FR_p_XR, f0 // r^3
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_XR2L = FR_p_XR, FR_p_XR, FR_p_XR2 // r^2 delta
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A18 = FR_p_A19, FR_p_XR, FR_p_A18 // poly tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A14 = FR_p_A15, FR_p_XR, FR_p_A14 // poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_XR4 = FR_p_XR2, FR_p_XR2, f0 // poly tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_z_Y3 = FR_z_Y2,FR_z_E4,FR_z_Y2 // y3 = y2+y2*e4
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp5L = FR_p_A5L, FR_p_XR, FR_p_Temp5L // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Poly5L = FR_p_A4H, f1, FR_p_Poly5H // Low poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp4H = FR_p_Poly5H, FR_p_XR, f0 // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp2L = FR_p_A2L, FR_p_XR, FR_p_Temp2L // High poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Poly2L = FR_p_A1H, f1, FR_p_Poly2H // High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp1H = FR_p_Poly2H, FR_p_XR, f0 // High poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_XR3L = FR_p_XR2, FR_p_XR, FR_p_XR3 // x^3 delta
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A16 = FR_p_A17, FR_p_XR, FR_p_A16 //poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_r_ResL = FR_r_A1H, f1, FR_r_ResH // neg sin
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // neg sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp5L = FR_p_Temp5L, f1, FR_p_A4L // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5H //Low poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Temp4L = FR_p_Poly5H, FR_p_XR, FR_p_Temp4H//Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly4H = FR_p_Temp4H, f1, FR_p_A3H // Low poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp2L = FR_p_Temp2L, f1, FR_p_A1L // High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2H // High poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Temp1L = FR_p_Poly2H,FR_p_XR,FR_p_Temp1H //High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly1H = FR_p_Temp1H, f1, FR_p_A0H // High poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A12 = FR_p_A13, FR_p_XR, FR_p_A12 // poly tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_XR3L = FR_p_XR2L, FR_p_XR, FR_p_XR3L // x^3 low
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5L //Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A10 = FR_p_A11, FR_p_XR, FR_p_A10 //poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Poly4L = FR_p_A3H, f1, FR_p_Poly4H /// Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A6 = FR_p_A7, FR_p_XR, FR_p_A6 // poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A8 = FR_p_A9, FR_p_XR, FR_p_A8 // poly tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_XR6 = FR_p_XR4, FR_p_XR2, f0 // r^6
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2L // High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Poly1L = FR_p_A0H, f1, FR_p_Poly1H // High poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TH // neg sin
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TT = FR_r_TL, f1, FR_r_A1L // neg sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp4L = FR_p_Poly5L,FR_p_XR,FR_p_Temp4L //Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A18 = FR_p_A20, FR_p_XR2, FR_p_A18 // poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4H // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A14 = FR_p_A16, FR_p_XR2, FR_p_A14 // poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A6 = FR_p_A8, FR_p_XR2, FR_p_A6 // poly tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A10 = FR_p_A12, FR_p_XR2, FR_p_A10 // poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp1L = FR_p_Poly2L,FR_p_XR,FR_p_Temp1L //High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1H // High poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TT // neg sin
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TH = FR_r_ResH, FR_r_XS2, f0 // neg sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp4L = FR_p_Temp4L, f1, FR_p_A3L // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly3H = FR_p_Poly4H, FR_p_XR3, f0 // Low poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A14 = FR_p_A18, FR_p_XR4, FR_p_A14 // poly tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_XR8 = FR_p_XR4, FR_p_XR4, f0 // r^8
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TL = FR_r_ResH, FR_r_XS2L, f0 // neg sin
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_z_R1 = f8,FR_z_ZH,f1 // r1 = a-b*x
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp1L = FR_p_Temp1L, f1, FR_p_A0L // High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_A6 = FR_p_A10, FR_p_XR4, FR_p_A6 // poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_r_TT = FR_r_ResH, FR_r_XS2, FR_r_TH // neg sin
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Res3H = FR_r_TH, f1, f1 // neg sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4L // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly3L = FR_p_Poly4H, FR_p_XR3L, f0 // Low poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly0H = FR_p_Poly3H, f1, FR_p_Poly1H // Result
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_A7 = FR_r_A8, FR_r_XS2, FR_r_A7 // neg sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TL = FR_r_ResL, FR_r_XS2, FR_r_TL // neg sin
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_XS4 = FR_r_XS2, FR_r_XS2, f0 // xs^4
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1L // High poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_PolyTail = FR_p_A14, FR_p_XR8, FR_p_A6 // poly tail
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_r_Res3L = f1, f1, FR_r_Res3H // neg sin
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResH = FR_r_Res3H, FR_r_XS, f0 // neg sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Temp0L = FR_p_Poly4H,FR_p_XR3,FR_p_Poly3H //Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly3L = FR_p_Poly4L,FR_p_XR3,FR_p_Poly3L //Low poly
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_p_Poly0L = FR_p_Poly1H, f1, FR_p_Poly0H // Result
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_z_ZL = FR_z_R1,FR_z_Y3, f0 // x_lo = r1*y3
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // neg sin
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_A3 = FR_r_A4, FR_r_XS2, FR_r_A3 /// neg sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly1L = FR_p_PolyTail,FR_p_XR6,FR_p_Poly1L // High
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_A5 = FR_r_A6, FR_r_XS2, FR_r_A5 // neg sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TH // neg sin
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_r_ResL = FR_r_Res3H, FR_r_XS, FR_r_ResH // neg sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly3L = FR_p_Poly3L, f1, FR_p_Temp0L // Low poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_A7 = FR_r_A9, FR_r_XS4, FR_r_A7 // neg sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly0L = FR_p_Poly0L, f1, FR_p_Poly3H // result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p14) fma.s1 f8 = FR_p_Poly0H, FR_z_ZH, f0 // z*poly
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp1L = FR_p_Poly0H, FR_z_ZL, f0 // z*poly low
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_A3 = FR_r_A5, FR_r_XS4, FR_r_A3 // sin tail
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_XS7 = FR_r_XS4, FR_r_XS2, f0 // xs^6
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TL // sin low
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_XS8 = FR_r_XS4, FR_r_XS4, f0 // xs^8
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Temp0H = FR_p_Poly3L, f1, FR_p_Poly1L // result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p14) fms.s1 FR_p_Temp1H = FR_p_Poly0H, FR_z_ZH, f8 // hi result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_XS7 = FR_r_XS7, FR_r_XS, f0 // xs^7
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResL = FR_r_Res3L, FR_r_XS, FR_r_ResL // lo result
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Tail = FR_r_A7, FR_r_XS8, FR_r_A3 // tail result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_p_Poly0L = FR_p_Poly0L, f1, FR_p_Temp0H // lo result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResL = FR_r_Tail, FR_r_XS7, FR_r_ResL // lo result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
(p14) fma.s1 FR_p_Temp1L = FR_p_Poly0L,FR_z_ZH,FR_p_Temp1L //hi result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TT = FR_r_ResL, f1, f0 // for low result
|
|
nop.i 0
|
|
};;
|
|
|
|
.pred.rel "mutex",p12,p13
|
|
{ .mfi
|
|
nop.m 0
|
|
(p14) fma.s1 FR_p_Temp1L = FR_p_Temp1L, f1, FR_p_Temp1H // for lo res
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
(p10) cmp.eq p13, p12 = r0, r0 // set p13, clear p12
|
|
fma.s1 FR_r_Res1H = FR_r_ResH, f1, FR_r_TT // hi res
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfb
|
|
(p9) cmp.eq p13, p12 = r0, r0 // set p13, clear p12
|
|
(p14) fma.s0 f8 = f8, f1, FR_p_Temp1L // Final result
|
|
(p14) br.ret.spnt b0 // Exit for 0 < |X| < 1 path (positive arguments)///////
|
|
};;
|
|
|
|
{ .mfi
|
|
(p11) cmp.eq p13, p12 = r0, r0 // set p13, clear p12
|
|
fms.s1 FR_r_Res1L = FR_r_ResH, f1, FR_r_Res1H // Low sin result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Res1L = FR_r_Res1L, f1, FR_r_TT // Low sin result
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TL = FR_p_Poly0L,FR_r_Res1H,f0 //Low sin result
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_TL = FR_p_Poly0H, FR_r_Res1L, FR_r_TL //Low sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResH = FR_p_Poly0H, FR_r_Res1H, FR_r_TL //High sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fms.s1 FR_r_ResL = FR_p_Poly0H,FR_r_Res1H,FR_r_ResH //Low res
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
frcpa.s1 FR_r_Y0,p0 = f1,FR_r_ResH // y = frcpa(b)
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fneg FR_r_NegOne = f1 // Construct -1.0
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TL // low sin
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Q0 = f1,FR_r_Y0,f0 // q = a*y
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_r_E0 = FR_r_Y0,FR_r_ResH,f1 // e = 1-b*y
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_E2 = FR_r_E0,FR_r_E0,FR_r_E0 // e2 = e+e^2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_E1 = FR_r_E0,FR_r_E0,f0 // e1 = e^2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Y1 = FR_r_Y0,FR_r_E2,FR_r_Y0 // y1 = y+y*e2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_E3 = FR_r_E1,FR_r_E1,FR_r_E0 // e3 = e+e1^2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Y2 = FR_r_Y1,FR_r_E3,FR_r_Y0 // y2 = y+y1*e3
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_r_R0 = FR_r_ResH,FR_r_Q0,f1 // r = a-b*q
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_r_E4 = FR_r_ResH,FR_r_Y2,f1 // e4 = 1-b*y2
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ZH = FR_r_R0,FR_r_Y2,FR_r_Q0 // x = q+r*y2
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_Y3 = FR_r_Y2,FR_r_E4,FR_r_Y2 // y3 = y2+y2*e4
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_r_R1 = FR_r_ResH,FR_r_ZH,f1 // r1 = a-b*x
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
fnma.s1 FR_r_R1 = FR_r_ResL,FR_r_ZH,FR_r_R1 // r1=r1 - b_lo*X
|
|
nop.i 0
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fma.s1 FR_r_ZHN = FR_r_ZH,FR_r_NegOne, f0 // Negate
|
|
nop.i 0
|
|
};;
|
|
|
|
.pred.rel "mutex",p13,p12
|
|
{ .mfb
|
|
nop.m 0
|
|
fnma.s0 f8 = FR_r_R1,FR_r_Y3,FR_r_ZHN // Result for neg
|
|
br.ret.sptk b0 // Exit for 0 < |X| < 1 path (negative arguments)//////
|
|
};;
|
|
|
|
|
|
|
|
|
|
// SPECIALS (x for natval, nan, +/-inf or +/-0) ///////////////////////////////
|
|
//------------------------------------------------------------------------------
|
|
.align 32
|
|
tgammal_spec:
|
|
{ .mlx
|
|
nop.m 0
|
|
movl GR_DenOverflow = 0x2000000000000001
|
|
}
|
|
{ .mfi
|
|
nop.m 0
|
|
fclass.m p9,p0 = f8,0xB // +/-denormals
|
|
nop.i 0
|
|
};;
|
|
{ .mfi
|
|
nop.m 0
|
|
fclass.m p6,p0 = f8,0x1E1 // Test x for natval, nan, +inf
|
|
nop.i 0
|
|
};;
|
|
{ .mfi
|
|
nop.m 0
|
|
fclass.m p7,p8 = f8,0x7 // +/-0
|
|
nop.i 0
|
|
}
|
|
|
|
{ .mfi
|
|
(p9) cmp.ltu.unc p10,p11 = GR_l_signif_Z, GR_DenOverflow
|
|
(p9) fnorm.s0 f8 = f8
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfb
|
|
nop.m 0
|
|
(p9) fcvt.fx.trunc.s1 FR_n_IXN = FR_l_AbsX // Round by truncate
|
|
(p11) br.cond.sptk tgamma_lt_1 // Return to gamma ('good' denormal)////////////
|
|
};;
|
|
|
|
{ .mfb
|
|
nop.m 0
|
|
nop.f 0
|
|
(p10) br.cond.spnt tgammal_overflow // "Bad" denormal - overflow! /////////////
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
mov FR_X = f8 // for error handler
|
|
nop.i 0
|
|
}
|
|
{ .mfb
|
|
nop.m 0
|
|
(p6) fma.s0 f8 = f8,f1,f8 // res = x + x
|
|
(p6) br.ret.spnt b0 // Exit for NAN, INF and NatVals ////////////////////////
|
|
};;
|
|
.pred.rel "mutex",p7,p8
|
|
{ .mfi
|
|
(p7) mov GR_Parameter_TAG = 256 // negative
|
|
(p7) frcpa.s0 f8,p0 = f1,f8 // Raise V flag
|
|
nop.i 0
|
|
}
|
|
{ .mfb
|
|
nop.m 0
|
|
nop.f 0
|
|
(p8) br.cond.spnt tgammal_singularity // Branch for +ZERO ////////////////////
|
|
};;
|
|
|
|
{ .mfb
|
|
nop.m 0
|
|
nop.f 0
|
|
br.cond.spnt tgammal_libm_err // Branch for -ZERO ///////////////////////
|
|
};;
|
|
|
|
|
|
|
|
|
|
// SINGULARITY (x is negative integer or 0) ////////////////////////////////////
|
|
//------------------------------------------------------------------------------
|
|
.align 32
|
|
tgammal_singularity:
|
|
{ .mfi
|
|
nop.m 0
|
|
mov FR_X = f8 // For error handler
|
|
mov GR_Parameter_TAG = 256 // negative
|
|
}
|
|
{ .mfb
|
|
nop.m 0
|
|
frcpa.s0 f8,p0 = f0,f0 // Raise V flag
|
|
br.cond.sptk tgammal_libm_err // Call error handler /////////////////////
|
|
// with singularity error /////////////////
|
|
};;
|
|
|
|
|
|
|
|
|
|
// OVERFLOW (result is too big and cannot be represented by normal value) //////
|
|
// ( X > 1755.54 and for denormals with abs value less than 0x2000000000000001 )
|
|
//------------------------------------------------------------------------------
|
|
.align 32
|
|
tgammal_overflow:
|
|
{ .mfi
|
|
addl r8 = 0x1FFFE, r0 // Exp of INF
|
|
fcmp.lt.s1 p15,p14 = f8,f0 // p14 - pos arg, p15 - neg arg
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
setf.exp f9 = r8
|
|
mov FR_X = f8 // For error handler
|
|
mov GR_Parameter_TAG = 255 // overflow
|
|
};;
|
|
|
|
.pred.rel "mutex",p14,p15
|
|
{ .mfi
|
|
nop.m 0
|
|
(p14) fma.s0 f8 = f9,f9,f0 // Set I,O and +INF result
|
|
nop.i 0
|
|
}
|
|
{ .mfb
|
|
nop.m 0
|
|
(p15) fnma.s0 f8 = f9,f9,f0 // Set I,O and -INF result
|
|
br.cond.sptk tgammal_libm_err // Call error handler /////////////////////
|
|
// with overflow error ////////////////////
|
|
};;
|
|
|
|
|
|
|
|
|
|
|
|
// UNDERFLOW (x is negative noninteger with big absolute value) ////////////////
|
|
//------------------------------------------------------------------------------
|
|
.align 32
|
|
tgammal_underflow:
|
|
{ .mfi
|
|
nop.m 0
|
|
fcvt.fx.trunc.s1 FR_u_IXN = f8 // Convert arg to int repres. in FR
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mmi
|
|
getf.sig GR_u_XN = FR_u_IXN
|
|
mov r11 = 0x00001
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
setf.exp f9 = r11
|
|
nop.f 0
|
|
nop.i 0
|
|
};;
|
|
|
|
{ .mfi
|
|
nop.m 0
|
|
nop.f 0
|
|
tbit.z p6,p7 = GR_u_XN,0 // even or odd
|
|
};;
|
|
|
|
.pred.rel "mutex",p6,p7
|
|
{ .mfi
|
|
nop.m 0
|
|
(p6) fms.s0 f8 = f9,f9,f9 // for negatives
|
|
nop.i 0
|
|
}
|
|
{ .mfb
|
|
nop.m 0
|
|
(p7) fma.s0 f8 = f9,f9,f9 // for positives
|
|
br.ret.sptk b0 // Exit for underflow path //////////////////////////////
|
|
};;
|
|
|
|
|
|
GLOBAL_LIBM_END(tgammal)
|
|
libm_alias_ldouble_other (tgamma, tgamma)
|
|
|
|
|
|
|
|
|
|
////////////////// Tgammal error handler ///////////////////////////////////////
|
|
//------------------------------------------------------------------------------
|
|
LOCAL_LIBM_ENTRY(__libm_error_region)
|
|
tgammal_libm_err:
|
|
.prologue
|
|
{ .mfi
|
|
add GR_Parameter_Y=-32,sp // Parameter 2 value
|
|
nop.f 0
|
|
.save ar.pfs,GR_SAVE_PFS
|
|
mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
|
|
}
|
|
{ .mfi
|
|
.fframe 64
|
|
add sp=-64,sp // Create new stack
|
|
nop.f 0
|
|
mov GR_SAVE_GP=gp // Save gp
|
|
};;
|
|
{ .mmi
|
|
stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
|
|
add GR_Parameter_X = 16,sp // Parameter 1 address
|
|
.save b0, GR_SAVE_B0
|
|
mov GR_SAVE_B0=b0 // Save b0
|
|
};;
|
|
.body
|
|
{ .mib
|
|
stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
|
|
add GR_Parameter_RESULT = 0,GR_Parameter_Y
|
|
nop.b 0 // Parameter 3 address
|
|
}
|
|
{ .mib
|
|
stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
|
|
add GR_Parameter_Y = -16,GR_Parameter_Y
|
|
br.call.sptk b0=__libm_error_support# // Call error handling function
|
|
};;
|
|
{ .mmi
|
|
nop.m 999
|
|
nop.m 999
|
|
add GR_Parameter_RESULT = 48,sp
|
|
};;
|
|
{ .mmi
|
|
ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
|
|
.restore sp
|
|
add sp = 64,sp // Restore stack pointer
|
|
mov b0 = GR_SAVE_B0 // Restore return address
|
|
};;
|
|
{ .mib
|
|
mov gp = GR_SAVE_GP // Restore gp
|
|
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
|
|
br.ret.sptk b0 // Return
|
|
};;
|
|
|
|
LOCAL_LIBM_END(__libm_error_region#)
|
|
|
|
.type __libm_error_support#,@function
|
|
.global __libm_error_support#
|