224#include "defines_ooura.h"
226#pragma clang diagnostic push
227#pragma clang diagnostic ignored "-Wunused-private-field"
236 double* SinusTable[18];
237 double* CosinusTable[18];
240 __inline
void cftbsub(
int n,
float* a) {
252#ifdef USE_CDFT_THREADS
253 void cftrec4_th(
int n,
float* a);
259#ifdef USE_CDFT_THREADS
260 if (n > CDFT_THREADS_BEGIN_N) {
266 }
else if (n > 128) {
272 }
else if (n == 32) {
286 __inline
void cdft(
int n,
int isgn,
float* a) {
297 __inline
void rdft(
int n,
int isgn,
float* a) {
315 a[1] = 0.5 * (a[0] - a[1]);
326 __inline
void ddct(
int n,
int isgn,
float* a) {
338 for (j = n - 2; j >= 2; j -= 2) {
339 a[j + 1] = a[j] - a[j - 1];
365 for (j = 2; j < n; j += 2) {
366 a[j - 1] = a[j] - a[j + 1];
373 __inline
void ddst(
int n,
int isgn,
float* a) {
385 for (j = n - 2; j >= 2; j -= 2) {
386 a[j + 1] = -a[j] - a[j - 1];
412 for (j = 2; j < n; j += 2) {
413 a[j - 1] = -a[j] - a[j + 1];
420 __inline
void dfct(
int n,
float* a) {
424 float xr, xi, yr, yi, an;
427 for (j = 0; j < m; j++) {
444 for (j = 1; j < mh; j++) {
469 __inline
void dfst(
int n,
float* a) {
473 float xr, xi, yr, yi;
476 for (j = 1; j < m; j++) {
489 for (j = 1; j < mh; j++) {
516#define M_PI_2 1.570796326794896619231321691639751442098584699687
519#define WR5000 0.707106781186547524400844362104849039284835937688
522#define WR2500 0.923879532511286756128183189396788286822416625863
525#define WI2500 0.382683432365089771728459984030398866761344562485
528#define WR1250 0.980785280403230449126182236134239036973933730893
531#define WI1250 0.195090322016128267848284868477022240927691617751
534#define WR3750 0.831469612302545237078788377617905756738560811987
537#define WI3750 0.555570233019602224742830813948532874374937190754
540#ifdef USE_CDFT_PTHREADS
541#define USE_CDFT_THREADS
542#ifndef CDFT_THREADS_BEGIN_N
543#define CDFT_THREADS_BEGIN_N 8192
545#ifndef CDFT_4THREADS_BEGIN_N
546#define CDFT_4THREADS_BEGIN_N 65536
551#define cdft_thread_t pthread_t
552#define cdft_thread_create(thp, func, argp) \
554 if (pthread_create(thp, NULL, func, (void*)argp) != 0) { \
555 fprintf(stderr, "cdft thread error\n"); \
559#define cdft_thread_wait(th) \
561 if (pthread_join(th, NULL) != 0) { \
562 fprintf(stderr, "cdft thread error\n"); \
568#ifdef USE_CDFT_WINTHREADS
569#define USE_CDFT_THREADS
570#ifndef CDFT_THREADS_BEGIN_N
571#define CDFT_THREADS_BEGIN_N 32768
573#ifndef CDFT_4THREADS_BEGIN_N
574#define CDFT_4THREADS_BEGIN_N 524288
579#define cdft_thread_t HANDLE
580#define cdft_thread_create(thp, func, argp) \
583 *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)func, (LPVOID)argp, \
586 fprintf(stderr, "cdft thread error\n"); \
590#define cdft_thread_wait(th) \
592 WaitForSingleObject(th, INFINITE); \
598#define CDFT_LOOP_DIV 32
602#define RDFT_LOOP_DIV 64
606#define DCST_LOOP_DIV 64
609 __inline
void cftfsub(
int n,
float* a) {
621#ifdef USE_CDFT_THREADS
622 void cftrec4_th(
int n,
float* a);
628#ifdef USE_CDFT_THREADS
629 if (n > CDFT_THREADS_BEGIN_N) {
635 }
else if (n > 128) {
641 }
else if (n == 32) {
655 __inline
void bitrv2(
int n,
float* a) {
656 int j0, k0, j1, k1, l, m, i, j, k, nh;
657 float xr, xi, yr, yi;
660 for (l = n >> 2; l > 8; l >>= 2) {
666 for (k0 = 0; k0 < m; k0 += 4) {
668 for (j = j0; j < j0 + k0; j += 4) {
827 for (i = nh >> 1; i > (k ^= i); i >>= 1);
890 for (i = nh >> 1; i > (j0 ^= i); i >>= 1);
894 for (k0 = 0; k0 < m; k0 += 4) {
896 for (j = j0; j < j0 + k0; j += 4) {
975 for (i = nh >> 1; i > (k ^= i); i >>= 1);
998 for (i = nh >> 1; i > (j0 ^= i); i >>= 1);
1003 __inline
void bitrv2conj(
int n,
float* a) {
1004 int j0, k0, j1, k1, l, m, i, j, k, nh;
1005 float xr, xi, yr, yi;
1008 for (l = n >> 2; l > 8; l >>= 2) {
1014 for (k0 = 0; k0 < m; k0 += 4) {
1016 for (j = j0; j < j0 + k0; j += 4) {
1175 for (i = nh >> 1; i > (k ^= i); i >>= 1);
1180 a[j1 - 1] = -a[j1 - 1];
1189 a[k1 + 3] = -a[k1 + 3];
1232 a[j1 - 1] = -a[j1 - 1];
1241 a[k1 + 3] = -a[k1 + 3];
1242 for (i = nh >> 1; i > (j0 ^= i); i >>= 1);
1246 for (k0 = 0; k0 < m; k0 += 4) {
1248 for (j = j0; j < j0 + k0; j += 4) {
1327 for (i = nh >> 1; i > (k ^= i); i >>= 1);
1332 a[j1 - 1] = -a[j1 - 1];
1341 a[k1 + 3] = -a[k1 + 3];
1344 a[j1 - 1] = -a[j1 - 1];
1353 a[k1 + 3] = -a[k1 + 3];
1354 for (i = nh >> 1; i > (j0 ^= i); i >>= 1);
1359 __inline
void bitrv216(
float* a) {
1360 float x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x7r, x7i, x8r, x8i,
1361 x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
1413 __inline
void bitrv216neg(
float* a) {
1414 float x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i,
1415 x8r, x8i, x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i,
1416 x14r, x14i, x15r, x15i;
1480 __inline
void bitrv208(
float* a) {
1481 float x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
1501 __inline
void bitrv208neg(
float* a) {
1502 float x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i;
1534 __inline
void bitrv1(
int n,
float* a) {
1535 int j0, k0, j1, k1, l, m, i, j, k, nh;
1543 for (l = n >> 2; l > 2; l >>= 2) {
1553 for (k0 = 2; k0 < m; k0 += 2) {
1554 for (i = nh >> 1; i > (j0 ^= i); i >>= 1);
1556 for (j = j0; j < j0 + k0; j += 2) {
1595 for (i = nh >> 1; i > (k ^= i); i >>= 1);
1611 for (k0 = 2; k0 < m; k0 += 2) {
1612 for (i = nh >> 1; i > (j0 ^= i); i >>= 1);
1614 for (j = j0; j < j0 + k0; j += 2) {
1633 for (i = nh >> 1; i > (k ^= i); i >>= 1);
1645 __inline
void cftb1st(
int n,
float* a) {
1646 int i, i0, j, j0, j1, j2, j3, m, mh;
1647 float ew, w1r, w1i, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i, ss1,
1649 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1657 x0i = -a[1] - a[j2 + 1];
1659 x1i = -a[1] + a[j2 + 1];
1660 x2r = a[j1] + a[j3];
1661 x2i = a[j1 + 1] + a[j3 + 1];
1662 x3r = a[j1] - a[j3];
1663 x3i = a[j1 + 1] - a[j3 + 1];
1667 a[j1 + 1] = x0i + x2i;
1669 a[j2 + 1] = x1i + x3r;
1671 a[j3 + 1] = x1i - x3r;
1682 wk3i = 2 * ss1 * wk1r;
1683 wk3r = wk1r - wk3i * wk1i;
1684 wk3i = wk1i - wk3i * wk1r;
1688 i0 = i + 4 * CDFT_LOOP_DIV;
1692 for (j = i + 2; j < i0; j += 4) {
1701 x0i = -a[j + 1] - a[j2 + 1];
1703 x1i = -a[j + 1] + a[j2 + 1];
1704 x2r = a[j1] + a[j3];
1705 x2i = a[j1 + 1] + a[j3 + 1];
1706 x3r = a[j1] - a[j3];
1707 x3i = a[j1 + 1] - a[j3 + 1];
1709 a[j + 1] = x0i - x2i;
1711 a[j1 + 1] = x0i + x2i;
1714 a[j2] = wk1r * x0r - wk1i * x0i;
1715 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
1718 a[j3] = wk3r * x0r + wk3i * x0i;
1719 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
1720 x0r = a[j + 2] + a[j2 + 2];
1721 x0i = -a[j + 3] - a[j2 + 3];
1722 x1r = a[j + 2] - a[j2 + 2];
1723 x1i = -a[j + 3] + a[j2 + 3];
1724 x2r = a[j1 + 2] + a[j3 + 2];
1725 x2i = a[j1 + 3] + a[j3 + 3];
1726 x3r = a[j1 + 2] - a[j3 + 2];
1727 x3i = a[j1 + 3] - a[j3 + 3];
1728 a[j + 2] = x0r + x2r;
1729 a[j + 3] = x0i - x2i;
1730 a[j1 + 2] = x0r - x2r;
1731 a[j1 + 3] = x0i + x2i;
1734 a[j2 + 2] = wd1r * x0r - wd1i * x0i;
1735 a[j2 + 3] = wd1r * x0i + wd1i * x0r;
1738 a[j3 + 2] = wd3r * x0r + wd3i * x0i;
1739 a[j3 + 3] = wd3r * x0i - wd3i * x0r;
1744 x0r = a[j0] + a[j2];
1745 x0i = -a[j0 + 1] - a[j2 + 1];
1746 x1r = a[j0] - a[j2];
1747 x1i = -a[j0 + 1] + a[j2 + 1];
1748 x2r = a[j1] + a[j3];
1749 x2i = a[j1 + 1] + a[j3 + 1];
1750 x3r = a[j1] - a[j3];
1751 x3i = a[j1 + 1] - a[j3 + 1];
1753 a[j0 + 1] = x0i - x2i;
1755 a[j1 + 1] = x0i + x2i;
1758 a[j2] = wk1i * x0r - wk1r * x0i;
1759 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
1762 a[j3] = wk3i * x0r + wk3r * x0i;
1763 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
1764 x0r = a[j0 - 2] + a[j2 - 2];
1765 x0i = -a[j0 - 1] - a[j2 - 1];
1766 x1r = a[j0 - 2] - a[j2 - 2];
1767 x1i = -a[j0 - 1] + a[j2 - 1];
1768 x2r = a[j1 - 2] + a[j3 - 2];
1769 x2i = a[j1 - 1] + a[j3 - 1];
1770 x3r = a[j1 - 2] - a[j3 - 2];
1771 x3i = a[j1 - 1] - a[j3 - 1];
1772 a[j0 - 2] = x0r + x2r;
1773 a[j0 - 1] = x0i - x2i;
1774 a[j1 - 2] = x0r - x2r;
1775 a[j1 - 1] = x0i + x2i;
1778 a[j2 - 2] = wd1i * x0r - wd1r * x0i;
1779 a[j2 - 1] = wd1i * x0i + wd1r * x0r;
1782 a[j3 - 2] = wd3i * x0r + wd3r * x0i;
1783 a[j3 - 1] = wd3i * x0i - wd3r * x0r;
1792 wd1r = cos(ew * i0);
1793 wd1i = sin(ew * i0);
1794 wd3i = 4 * wd1i * wd1r;
1795 wd3r = wd1r - wd3i * wd1i;
1796 wd3i = wd1i - wd3i * wd1r;
1797 wk1r = w1r * wd1r - w1i * wd1i;
1798 wk1i = w1r * wd1i + w1i * wd1r;
1799 wk3i = 4 * wk1i * wk1r;
1800 wk3r = wk1r - wk3i * wk1i;
1801 wk3i = wk1i - wk3i * wk1r;
1809 x0r = a[j0 - 2] + a[j2 - 2];
1810 x0i = -a[j0 - 1] - a[j2 - 1];
1811 x1r = a[j0 - 2] - a[j2 - 2];
1812 x1i = -a[j0 - 1] + a[j2 - 1];
1813 x2r = a[j1 - 2] + a[j3 - 2];
1814 x2i = a[j1 - 1] + a[j3 - 1];
1815 x3r = a[j1 - 2] - a[j3 - 2];
1816 x3i = a[j1 - 1] - a[j3 - 1];
1817 a[j0 - 2] = x0r + x2r;
1818 a[j0 - 1] = x0i - x2i;
1819 a[j1 - 2] = x0r - x2r;
1820 a[j1 - 1] = x0i + x2i;
1823 a[j2 - 2] = wk1r * x0r - wk1i * x0i;
1824 a[j2 - 1] = wk1r * x0i + wk1i * x0r;
1827 a[j3 - 2] = wk3r * x0r + wk3i * x0i;
1828 a[j3 - 1] = wk3r * x0i - wk3i * x0r;
1829 x0r = a[j0] + a[j2];
1830 x0i = -a[j0 + 1] - a[j2 + 1];
1831 x1r = a[j0] - a[j2];
1832 x1i = -a[j0 + 1] + a[j2 + 1];
1833 x2r = a[j1] + a[j3];
1834 x2i = a[j1 + 1] + a[j3 + 1];
1835 x3r = a[j1] - a[j3];
1836 x3i = a[j1 + 1] - a[j3 + 1];
1838 a[j0 + 1] = x0i - x2i;
1840 a[j1 + 1] = x0i + x2i;
1843 a[j2] = wd1r * (x0r - x0i);
1844 a[j2 + 1] = wd1r * (x0i + x0r);
1847 a[j3] = -wd1r * (x0r + x0i);
1848 a[j3 + 1] = -wd1r * (x0i - x0r);
1849 x0r = a[j0 + 2] + a[j2 + 2];
1850 x0i = -a[j0 + 3] - a[j2 + 3];
1851 x1r = a[j0 + 2] - a[j2 + 2];
1852 x1i = -a[j0 + 3] + a[j2 + 3];
1853 x2r = a[j1 + 2] + a[j3 + 2];
1854 x2i = a[j1 + 3] + a[j3 + 3];
1855 x3r = a[j1 + 2] - a[j3 + 2];
1856 x3i = a[j1 + 3] - a[j3 + 3];
1857 a[j0 + 2] = x0r + x2r;
1858 a[j0 + 3] = x0i - x2i;
1859 a[j1 + 2] = x0r - x2r;
1860 a[j1 + 3] = x0i + x2i;
1863 a[j2 + 2] = wk1i * x0r - wk1r * x0i;
1864 a[j2 + 3] = wk1i * x0i + wk1r * x0r;
1867 a[j3 + 2] = wk3i * x0r + wk3r * x0i;
1868 a[j3 + 3] = wk3i * x0i - wk3r * x0r;
1871#ifdef USE_CDFT_THREADS
1872 struct cdft_arg_st {
1877 typedef struct cdft_arg_st cdft_arg_t;
1879 __inline
void cftrec4_th(
int n,
float* a) {
1880 void* cftrec1_th(
void* p);
1881 void* cftrec2_th(
void* p);
1882 int i, idiv4, m, nthread;
1883 cdft_thread_t th[4];
1889 if (n > CDFT_4THREADS_BEGIN_N) {
1894 for (i = 0; i < nthread; i++) {
1897 ag[i].a = &a[i * m];
1899 cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
1901 cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
1904 for (i = 0; i < nthread; i++) {
1905 cdft_thread_wait(th[i]);
1909 __inline
void* cftrec1_th(
void* p) {
1910 int cfttree(
int n,
int j,
int k,
float* a);
1911 void cftleaf(
int n,
int isplt,
float* a);
1912 void cftmdl1(
int n,
float* a);
1913 int isplt, j, k, m, n, n0;
1916 n0 = ((cdft_arg_t*)p)->n0;
1917 n = ((cdft_arg_t*)p)->n;
1918 a = ((cdft_arg_t*)p)->a;
1922 cftmdl1(m, &a[n - m]);
1924 cftleaf(m, 1, &a[n - m]);
1926 for (j = n - m; j > 0; j -= m) {
1928 isplt = cfttree(m, j, k, a);
1929 cftleaf(m, isplt, &a[j - m]);
1934 __inline
void* cftrec2_th(
void* p) {
1935 int cfttree(
int n,
int j,
int k,
float* a);
1936 void cftleaf(
int n,
int isplt,
float* a);
1937 void cftmdl2(
int n,
float* a);
1938 int isplt, j, k, m, n, n0;
1941 n0 = ((cdft_arg_t*)p)->n0;
1942 n = ((cdft_arg_t*)p)->n;
1943 a = ((cdft_arg_t*)p)->a;
1949 cftmdl2(m, &a[n - m]);
1951 cftleaf(m, 0, &a[n - m]);
1953 for (j = n - m; j > 0; j -= m) {
1955 isplt = cfttree(m, j, k, a);
1956 cftleaf(m, isplt, &a[j - m]);
1962 __inline
void cftrec4(
int n,
float* a) {
1971 cftmdl1(m, &a[n - m]);
1973 cftleaf(m, 1, &a[n - m]);
1975 for (j = n - m; j > 0; j -= m) {
1977 isplt = cfttree(m, j, k, a);
1978 cftleaf(m, isplt, &a[j - m]);
1982 __inline
int cfttree(
int n,
int j,
int k,
float* a) {
1990 cftmdl1(n, &a[j - n]);
1992 cftmdl2(n, &a[j - n]);
1996 for (i = k; (i & 3) == 0; i >>= 2) {
2002 cftmdl1(m, &a[j - m]);
2007 cftmdl2(m, &a[j - m]);
2015 __inline
void cftleaf(
int n,
int isplt,
float* a) {
2029 cftmdl2(128, &a[128]);
2034 cftmdl1(128, &a[256]);
2040 cftmdl1(128, &a[384]);
2043 cftmdl2(128, &a[384]);
2055 cftmdl2(64, &a[64]);
2060 cftmdl1(64, &a[128]);
2066 cftmdl1(64, &a[192]);
2069 cftmdl2(64, &a[192]);
2078 __inline
void cftmdl1(
int n,
float* a) {
2079 int i, i0, j, j0, j1, j2, j3, m, mh;
2080 float ew, w1r, w1i, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i, ss1,
2082 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2090 x0i = a[1] + a[j2 + 1];
2092 x1i = a[1] - a[j2 + 1];
2093 x2r = a[j1] + a[j3];
2094 x2i = a[j1 + 1] + a[j3 + 1];
2095 x3r = a[j1] - a[j3];
2096 x3i = a[j1 + 1] - a[j3 + 1];
2100 a[j1 + 1] = x0i - x2i;
2102 a[j2 + 1] = x1i + x3r;
2104 a[j3 + 1] = x1i - x3r;
2115 wk3i = 2 * ss1 * wk1r;
2116 wk3r = wk1r - wk3i * wk1i;
2117 wk3i = wk1i - wk3i * wk1r;
2121 i0 = i + 4 * CDFT_LOOP_DIV;
2125 for (j = i + 2; j < i0; j += 4) {
2134 x0i = a[j + 1] + a[j2 + 1];
2136 x1i = a[j + 1] - a[j2 + 1];
2137 x2r = a[j1] + a[j3];
2138 x2i = a[j1 + 1] + a[j3 + 1];
2139 x3r = a[j1] - a[j3];
2140 x3i = a[j1 + 1] - a[j3 + 1];
2142 a[j + 1] = x0i + x2i;
2144 a[j1 + 1] = x0i - x2i;
2147 a[j2] = wk1r * x0r - wk1i * x0i;
2148 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2151 a[j3] = wk3r * x0r + wk3i * x0i;
2152 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2153 x0r = a[j + 2] + a[j2 + 2];
2154 x0i = a[j + 3] + a[j2 + 3];
2155 x1r = a[j + 2] - a[j2 + 2];
2156 x1i = a[j + 3] - a[j2 + 3];
2157 x2r = a[j1 + 2] + a[j3 + 2];
2158 x2i = a[j1 + 3] + a[j3 + 3];
2159 x3r = a[j1 + 2] - a[j3 + 2];
2160 x3i = a[j1 + 3] - a[j3 + 3];
2161 a[j + 2] = x0r + x2r;
2162 a[j + 3] = x0i + x2i;
2163 a[j1 + 2] = x0r - x2r;
2164 a[j1 + 3] = x0i - x2i;
2167 a[j2 + 2] = wd1r * x0r - wd1i * x0i;
2168 a[j2 + 3] = wd1r * x0i + wd1i * x0r;
2171 a[j3 + 2] = wd3r * x0r + wd3i * x0i;
2172 a[j3 + 3] = wd3r * x0i - wd3i * x0r;
2177 x0r = a[j0] + a[j2];
2178 x0i = a[j0 + 1] + a[j2 + 1];
2179 x1r = a[j0] - a[j2];
2180 x1i = a[j0 + 1] - a[j2 + 1];
2181 x2r = a[j1] + a[j3];
2182 x2i = a[j1 + 1] + a[j3 + 1];
2183 x3r = a[j1] - a[j3];
2184 x3i = a[j1 + 1] - a[j3 + 1];
2186 a[j0 + 1] = x0i + x2i;
2188 a[j1 + 1] = x0i - x2i;
2191 a[j2] = wk1i * x0r - wk1r * x0i;
2192 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2195 a[j3] = wk3i * x0r + wk3r * x0i;
2196 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2197 x0r = a[j0 - 2] + a[j2 - 2];
2198 x0i = a[j0 - 1] + a[j2 - 1];
2199 x1r = a[j0 - 2] - a[j2 - 2];
2200 x1i = a[j0 - 1] - a[j2 - 1];
2201 x2r = a[j1 - 2] + a[j3 - 2];
2202 x2i = a[j1 - 1] + a[j3 - 1];
2203 x3r = a[j1 - 2] - a[j3 - 2];
2204 x3i = a[j1 - 1] - a[j3 - 1];
2205 a[j0 - 2] = x0r + x2r;
2206 a[j0 - 1] = x0i + x2i;
2207 a[j1 - 2] = x0r - x2r;
2208 a[j1 - 1] = x0i - x2i;
2211 a[j2 - 2] = wd1i * x0r - wd1r * x0i;
2212 a[j2 - 1] = wd1i * x0i + wd1r * x0r;
2215 a[j3 - 2] = wd3i * x0r + wd3r * x0i;
2216 a[j3 - 1] = wd3i * x0i - wd3r * x0r;
2225 wd1r = cos(ew * i0);
2226 wd1i = sin(ew * i0);
2227 wd3i = 4 * wd1i * wd1r;
2228 wd3r = wd1r - wd3i * wd1i;
2229 wd3i = wd1i - wd3i * wd1r;
2230 wk1r = w1r * wd1r - w1i * wd1i;
2231 wk1i = w1r * wd1i + w1i * wd1r;
2232 wk3i = 4 * wk1i * wk1r;
2233 wk3r = wk1r - wk3i * wk1i;
2234 wk3i = wk1i - wk3i * wk1r;
2242 x0r = a[j0 - 2] + a[j2 - 2];
2243 x0i = a[j0 - 1] + a[j2 - 1];
2244 x1r = a[j0 - 2] - a[j2 - 2];
2245 x1i = a[j0 - 1] - a[j2 - 1];
2246 x2r = a[j1 - 2] + a[j3 - 2];
2247 x2i = a[j1 - 1] + a[j3 - 1];
2248 x3r = a[j1 - 2] - a[j3 - 2];
2249 x3i = a[j1 - 1] - a[j3 - 1];
2250 a[j0 - 2] = x0r + x2r;
2251 a[j0 - 1] = x0i + x2i;
2252 a[j1 - 2] = x0r - x2r;
2253 a[j1 - 1] = x0i - x2i;
2256 a[j2 - 2] = wk1r * x0r - wk1i * x0i;
2257 a[j2 - 1] = wk1r * x0i + wk1i * x0r;
2260 a[j3 - 2] = wk3r * x0r + wk3i * x0i;
2261 a[j3 - 1] = wk3r * x0i - wk3i * x0r;
2262 x0r = a[j0] + a[j2];
2263 x0i = a[j0 + 1] + a[j2 + 1];
2264 x1r = a[j0] - a[j2];
2265 x1i = a[j0 + 1] - a[j2 + 1];
2266 x2r = a[j1] + a[j3];
2267 x2i = a[j1 + 1] + a[j3 + 1];
2268 x3r = a[j1] - a[j3];
2269 x3i = a[j1 + 1] - a[j3 + 1];
2271 a[j0 + 1] = x0i + x2i;
2273 a[j1 + 1] = x0i - x2i;
2276 a[j2] = wd1r * (x0r - x0i);
2277 a[j2 + 1] = wd1r * (x0i + x0r);
2280 a[j3] = -wd1r * (x0r + x0i);
2281 a[j3 + 1] = -wd1r * (x0i - x0r);
2282 x0r = a[j0 + 2] + a[j2 + 2];
2283 x0i = a[j0 + 3] + a[j2 + 3];
2284 x1r = a[j0 + 2] - a[j2 + 2];
2285 x1i = a[j0 + 3] - a[j2 + 3];
2286 x2r = a[j1 + 2] + a[j3 + 2];
2287 x2i = a[j1 + 3] + a[j3 + 3];
2288 x3r = a[j1 + 2] - a[j3 + 2];
2289 x3i = a[j1 + 3] - a[j3 + 3];
2290 a[j0 + 2] = x0r + x2r;
2291 a[j0 + 3] = x0i + x2i;
2292 a[j1 + 2] = x0r - x2r;
2293 a[j1 + 3] = x0i - x2i;
2296 a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2297 a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2300 a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2301 a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2304 __inline
void cftmdl2(
int n,
float* a) {
2305 int i, i0, j, j0, j1, j2, j3, m, mh;
2306 float ew, w1r, w1i, wn4r, wk1r, wk1i, wk3r, wk3i, wl1r, wl1i, wl3r, wl3i,
2307 wd1r, wd1i, wd3r, wd3i, we1r, we1i, we3r, we3i, ss1, ss3;
2308 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
2316 x0r = a[0] - a[j2 + 1];
2318 x1r = a[0] + a[j2 + 1];
2320 x2r = a[j1] - a[j3 + 1];
2321 x2i = a[j1 + 1] + a[j3];
2322 x3r = a[j1] + a[j3 + 1];
2323 x3i = a[j1 + 1] - a[j3];
2324 y0r = wn4r * (x2r - x2i);
2325 y0i = wn4r * (x2i + x2r);
2329 a[j1 + 1] = x0i - y0i;
2330 y0r = wn4r * (x3r - x3i);
2331 y0i = wn4r * (x3i + x3r);
2333 a[j2 + 1] = x1i + y0r;
2335 a[j3 + 1] = x1i - y0r;
2344 ew = M_PI_2 / (2 * m);
2349 wd1r = wn4r * (w1r - w1i);
2350 wd1i = wn4r * (w1i + w1r);
2352 wk3i = 2 * ss1 * wk1r;
2353 wk3r = wk1r - wk3i * wk1i;
2354 wk3i = wk1i - wk3i * wk1r;
2356 wd3r = -wn4r * (wk3r - wk3i);
2357 wd3i = -wn4r * (wk3i + wk3r);
2360 i0 = i + 4 * CDFT_LOOP_DIV;
2364 for (j = i + 2; j < i0; j += 4) {
2376 x0r = a[j] - a[j2 + 1];
2377 x0i = a[j + 1] + a[j2];
2378 x1r = a[j] + a[j2 + 1];
2379 x1i = a[j + 1] - a[j2];
2380 x2r = a[j1] - a[j3 + 1];
2381 x2i = a[j1 + 1] + a[j3];
2382 x3r = a[j1] + a[j3 + 1];
2383 x3i = a[j1 + 1] - a[j3];
2384 y0r = wk1r * x0r - wk1i * x0i;
2385 y0i = wk1r * x0i + wk1i * x0r;
2386 y2r = wd1r * x2r - wd1i * x2i;
2387 y2i = wd1r * x2i + wd1i * x2r;
2389 a[j + 1] = y0i + y2i;
2391 a[j1 + 1] = y0i - y2i;
2392 y0r = wk3r * x1r + wk3i * x1i;
2393 y0i = wk3r * x1i - wk3i * x1r;
2394 y2r = wd3r * x3r + wd3i * x3i;
2395 y2i = wd3r * x3i - wd3i * x3r;
2397 a[j2 + 1] = y0i + y2i;
2399 a[j3 + 1] = y0i - y2i;
2400 x0r = a[j + 2] - a[j2 + 3];
2401 x0i = a[j + 3] + a[j2 + 2];
2402 x1r = a[j + 2] + a[j2 + 3];
2403 x1i = a[j + 3] - a[j2 + 2];
2404 x2r = a[j1 + 2] - a[j3 + 3];
2405 x2i = a[j1 + 3] + a[j3 + 2];
2406 x3r = a[j1 + 2] + a[j3 + 3];
2407 x3i = a[j1 + 3] - a[j3 + 2];
2408 y0r = wl1r * x0r - wl1i * x0i;
2409 y0i = wl1r * x0i + wl1i * x0r;
2410 y2r = we1r * x2r - we1i * x2i;
2411 y2i = we1r * x2i + we1i * x2r;
2412 a[j + 2] = y0r + y2r;
2413 a[j + 3] = y0i + y2i;
2414 a[j1 + 2] = y0r - y2r;
2415 a[j1 + 3] = y0i - y2i;
2416 y0r = wl3r * x1r + wl3i * x1i;
2417 y0i = wl3r * x1i - wl3i * x1r;
2418 y2r = we3r * x3r + we3i * x3i;
2419 y2i = we3r * x3i - we3i * x3r;
2420 a[j2 + 2] = y0r + y2r;
2421 a[j2 + 3] = y0i + y2i;
2422 a[j3 + 2] = y0r - y2r;
2423 a[j3 + 3] = y0i - y2i;
2428 x0r = a[j0] - a[j2 + 1];
2429 x0i = a[j0 + 1] + a[j2];
2430 x1r = a[j0] + a[j2 + 1];
2431 x1i = a[j0 + 1] - a[j2];
2432 x2r = a[j1] - a[j3 + 1];
2433 x2i = a[j1 + 1] + a[j3];
2434 x3r = a[j1] + a[j3 + 1];
2435 x3i = a[j1 + 1] - a[j3];
2436 y0r = wd1i * x0r - wd1r * x0i;
2437 y0i = wd1i * x0i + wd1r * x0r;
2438 y2r = wk1i * x2r - wk1r * x2i;
2439 y2i = wk1i * x2i + wk1r * x2r;
2441 a[j0 + 1] = y0i + y2i;
2443 a[j1 + 1] = y0i - y2i;
2444 y0r = wd3i * x1r + wd3r * x1i;
2445 y0i = wd3i * x1i - wd3r * x1r;
2446 y2r = wk3i * x3r + wk3r * x3i;
2447 y2i = wk3i * x3i - wk3r * x3r;
2449 a[j2 + 1] = y0i + y2i;
2451 a[j3 + 1] = y0i - y2i;
2452 x0r = a[j0 - 2] - a[j2 - 1];
2453 x0i = a[j0 - 1] + a[j2 - 2];
2454 x1r = a[j0 - 2] + a[j2 - 1];
2455 x1i = a[j0 - 1] - a[j2 - 2];
2456 x2r = a[j1 - 2] - a[j3 - 1];
2457 x2i = a[j1 - 1] + a[j3 - 2];
2458 x3r = a[j1 - 2] + a[j3 - 1];
2459 x3i = a[j1 - 1] - a[j3 - 2];
2460 y0r = we1i * x0r - we1r * x0i;
2461 y0i = we1i * x0i + we1r * x0r;
2462 y2r = wl1i * x2r - wl1r * x2i;
2463 y2i = wl1i * x2i + wl1r * x2r;
2464 a[j0 - 2] = y0r + y2r;
2465 a[j0 - 1] = y0i + y2i;
2466 a[j1 - 2] = y0r - y2r;
2467 a[j1 - 1] = y0i - y2i;
2468 y0r = we3i * x1r + we3r * x1i;
2469 y0i = we3i * x1i - we3r * x1r;
2470 y2r = wl3i * x3r + wl3r * x3i;
2471 y2i = wl3i * x3i - wl3r * x3r;
2472 a[j2 - 2] = y0r + y2r;
2473 a[j2 - 1] = y0i + y2i;
2474 a[j3 - 2] = y0r - y2r;
2475 a[j3 - 1] = y0i - y2i;
2488 wl1r = cos(ew * i0);
2489 wl1i = sin(ew * i0);
2490 wl3i = 4 * wl1i * wl1r;
2491 wl3r = wl1r - wl3i * wl1i;
2492 wl3i = wl1i - wl3i * wl1r;
2493 we1r = wn4r * (wl1r - wl1i);
2494 we1i = wn4r * (wl1i + wl1r);
2495 we3r = -wn4r * (wl3r - wl3i);
2496 we3i = -wn4r * (wl3i + wl3r);
2497 wk1r = w1r * wl1r - w1i * wl1i;
2498 wk1i = w1r * wl1i + w1i * wl1r;
2499 wk3i = 4 * wk1i * wk1r;
2500 wk3r = wk1r - wk3i * wk1i;
2501 wk3i = wk1i - wk3i * wk1r;
2502 wd1r = wn4r * (wk1r - wk1i);
2503 wd1i = wn4r * (wk1i + wk1r);
2504 wd3r = -wn4r * (wk3r - wk3i);
2505 wd3i = -wn4r * (wk3i + wk3r);
2514 x0r = a[j0 - 2] - a[j2 - 1];
2515 x0i = a[j0 - 1] + a[j2 - 2];
2516 x1r = a[j0 - 2] + a[j2 - 1];
2517 x1i = a[j0 - 1] - a[j2 - 2];
2518 x2r = a[j1 - 2] - a[j3 - 1];
2519 x2i = a[j1 - 1] + a[j3 - 2];
2520 x3r = a[j1 - 2] + a[j3 - 1];
2521 x3i = a[j1 - 1] - a[j3 - 2];
2522 y0r = wk1r * x0r - wk1i * x0i;
2523 y0i = wk1r * x0i + wk1i * x0r;
2524 y2r = wd1r * x2r - wd1i * x2i;
2525 y2i = wd1r * x2i + wd1i * x2r;
2526 a[j0 - 2] = y0r + y2r;
2527 a[j0 - 1] = y0i + y2i;
2528 a[j1 - 2] = y0r - y2r;
2529 a[j1 - 1] = y0i - y2i;
2530 y0r = wk3r * x1r + wk3i * x1i;
2531 y0i = wk3r * x1i - wk3i * x1r;
2532 y2r = wd3r * x3r + wd3i * x3i;
2533 y2i = wd3r * x3i - wd3i * x3r;
2534 a[j2 - 2] = y0r + y2r;
2535 a[j2 - 1] = y0i + y2i;
2536 a[j3 - 2] = y0r - y2r;
2537 a[j3 - 1] = y0i - y2i;
2538 x0r = a[j0] - a[j2 + 1];
2539 x0i = a[j0 + 1] + a[j2];
2540 x1r = a[j0] + a[j2 + 1];
2541 x1i = a[j0 + 1] - a[j2];
2542 x2r = a[j1] - a[j3 + 1];
2543 x2i = a[j1 + 1] + a[j3];
2544 x3r = a[j1] + a[j3 + 1];
2545 x3i = a[j1 + 1] - a[j3];
2546 y0r = wl1r * x0r - wl1i * x0i;
2547 y0i = wl1r * x0i + wl1i * x0r;
2548 y2r = wl1i * x2r - wl1r * x2i;
2549 y2i = wl1i * x2i + wl1r * x2r;
2551 a[j0 + 1] = y0i + y2i;
2553 a[j1 + 1] = y0i - y2i;
2554 y0r = wl1i * x1r - wl1r * x1i;
2555 y0i = wl1i * x1i + wl1r * x1r;
2556 y2r = wl1r * x3r - wl1i * x3i;
2557 y2i = wl1r * x3i + wl1i * x3r;
2559 a[j2 + 1] = y0i - y2i;
2561 a[j3 + 1] = y0i + y2i;
2562 x0r = a[j0 + 2] - a[j2 + 3];
2563 x0i = a[j0 + 3] + a[j2 + 2];
2564 x1r = a[j0 + 2] + a[j2 + 3];
2565 x1i = a[j0 + 3] - a[j2 + 2];
2566 x2r = a[j1 + 2] - a[j3 + 3];
2567 x2i = a[j1 + 3] + a[j3 + 2];
2568 x3r = a[j1 + 2] + a[j3 + 3];
2569 x3i = a[j1 + 3] - a[j3 + 2];
2570 y0r = wd1i * x0r - wd1r * x0i;
2571 y0i = wd1i * x0i + wd1r * x0r;
2572 y2r = wk1i * x2r - wk1r * x2i;
2573 y2i = wk1i * x2i + wk1r * x2r;
2574 a[j0 + 2] = y0r + y2r;
2575 a[j0 + 3] = y0i + y2i;
2576 a[j1 + 2] = y0r - y2r;
2577 a[j1 + 3] = y0i - y2i;
2578 y0r = wd3i * x1r + wd3r * x1i;
2579 y0i = wd3i * x1i - wd3r * x1r;
2580 y2r = wk3i * x3r + wk3r * x3i;
2581 y2i = wk3i * x3i - wk3r * x3r;
2582 a[j2 + 2] = y0r + y2r;
2583 a[j2 + 3] = y0i + y2i;
2584 a[j3 + 2] = y0r - y2r;
2585 a[j3 + 3] = y0i - y2i;
2588 __inline
void cftfx41(
int n,
float* a) {
2607 __inline
void cftf161(
float* a) {
2608 float wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i,
2609 y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
2610 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i,
2611 y14r, y14i, y15r, y15i;
2636 x2r = a[10] + a[26];
2637 x2i = a[11] + a[27];
2638 x3r = a[10] - a[26];
2639 x3i = a[11] - a[27];
2646 y9r = wk1r * x0r - wk1i * x0i;
2647 y9i = wk1r * x0i + wk1i * x0r;
2650 y13r = wk1i * x0r - wk1r * x0i;
2651 y13i = wk1i * x0i + wk1r * x0r;
2656 x2r = a[12] + a[28];
2657 x2i = a[13] + a[29];
2658 x3r = a[12] - a[28];
2659 x3i = a[13] - a[29];
2666 y10r = wn4r * (x0r - x0i);
2667 y10i = wn4r * (x0i + x0r);
2670 y14r = wn4r * (x0r + x0i);
2671 y14i = wn4r * (x0i - x0r);
2676 x2r = a[14] + a[30];
2677 x2i = a[15] + a[31];
2678 x3r = a[14] - a[30];
2679 x3i = a[15] - a[31];
2686 y11r = wk1i * x0r - wk1r * x0i;
2687 y11i = wk1i * x0i + wk1r * x0r;
2690 y15r = wk1r * x0r - wk1i * x0i;
2691 y15i = wk1r * x0i + wk1i * x0r;
2726 x2r = wn4r * (x0r - x0i);
2727 x2i = wn4r * (x0i + x0r);
2730 x3r = wn4r * (x0r - x0i);
2731 x3i = wn4r * (x0i + x0r);
2762 __inline
void cftf162(
float* a) {
2763 float wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, x0r, x0i, x1r, x1i, x2r,
2764 x2i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r,
2765 y6i, y7r, y7i, y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i,
2766 y13r, y13i, y14r, y14i, y15r, y15i;
2779 x2r = wn4r * (x0r - x0i);
2780 x2i = wn4r * (x0i + x0r);
2789 x2r = wn4r * (x0r - x0i);
2790 x2i = wn4r * (x0i + x0r);
2797 x1r = wk1r * x0r - wk1i * x0i;
2798 x1i = wk1r * x0i + wk1i * x0r;
2799 x0r = a[10] - a[27];
2800 x0i = a[11] + a[26];
2801 x2r = wk3i * x0r - wk3r * x0i;
2802 x2i = wk3i * x0i + wk3r * x0r;
2809 x1r = wk3r * x0r - wk3i * x0i;
2810 x1i = wk3r * x0i + wk3i * x0r;
2811 x0r = a[10] + a[27];
2812 x0i = a[11] - a[26];
2813 x2r = wk1r * x0r + wk1i * x0i;
2814 x2i = wk1r * x0i - wk1i * x0r;
2821 x1r = wk2r * x0r - wk2i * x0i;
2822 x1i = wk2r * x0i + wk2i * x0r;
2823 x0r = a[12] - a[29];
2824 x0i = a[13] + a[28];
2825 x2r = wk2i * x0r - wk2r * x0i;
2826 x2i = wk2i * x0i + wk2r * x0r;
2833 x1r = wk2i * x0r - wk2r * x0i;
2834 x1i = wk2i * x0i + wk2r * x0r;
2835 x0r = a[12] + a[29];
2836 x0i = a[13] - a[28];
2837 x2r = wk2r * x0r - wk2i * x0i;
2838 x2i = wk2r * x0i + wk2i * x0r;
2845 x1r = wk3r * x0r - wk3i * x0i;
2846 x1i = wk3r * x0i + wk3i * x0r;
2847 x0r = a[14] - a[31];
2848 x0i = a[15] + a[30];
2849 x2r = wk1i * x0r - wk1r * x0i;
2850 x2i = wk1i * x0i + wk1r * x0r;
2857 x1r = wk1i * x0r + wk1r * x0i;
2858 x1i = wk1i * x0i - wk1r * x0r;
2859 x0r = a[14] + a[31];
2860 x0i = a[15] - a[30];
2861 x2r = wk3i * x0r - wk3r * x0i;
2862 x2i = wk3i * x0i + wk3r * x0r;
2887 x2r = wn4r * (x0r - x0i);
2888 x2i = wn4r * (x0i + x0r);
2897 x2r = wn4r * (x0r - x0i);
2898 x2i = wn4r * (x0i + x0r);
2923 x2r = wn4r * (x0r - x0i);
2924 x2i = wn4r * (x0i + x0r);
2933 x2r = wn4r * (x0r - x0i);
2934 x2i = wn4r * (x0i + x0r);
2941 __inline
void cftf081(
float* a) {
2942 float wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r,
2943 y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
2978 y5r = wn4r * (x0r - x0i);
2979 y5i = wn4r * (x0r + x0i);
2980 y7r = wn4r * (x2r - x2i);
2981 y7i = wn4r * (x2r + x2i);
3000 __inline
void cftf082(
float* a) {
3001 float wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, y0r, y0i, y1r, y1i, y2r, y2i,
3002 y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3013 y2r = wn4r * (x0r - x0i);
3014 y2i = wn4r * (x0i + x0r);
3017 y3r = wn4r * (x0r - x0i);
3018 y3i = wn4r * (x0i + x0r);
3021 y4r = wk1r * x0r - wk1i * x0i;
3022 y4i = wk1r * x0i + wk1i * x0r;
3025 y5r = wk1i * x0r - wk1r * x0i;
3026 y5i = wk1i * x0i + wk1r * x0r;
3029 y6r = wk1i * x0r - wk1r * x0i;
3030 y6i = wk1i * x0i + wk1r * x0r;
3033 y7r = wk1r * x0r - wk1i * x0i;
3034 y7i = wk1r * x0i + wk1i * x0r;
3069 __inline
void cftf040(
float* a) {
3070 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3090 __inline
void cftb040(
float* a) {
3091 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3111 __inline
void cftx020(
float* a) {
3122 __inline
void rftfsub(
int n,
float* a) {
3124 float ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3126 ec = 2 * M_PI_2 / n;
3138 i0 = i - 4 * RDFT_LOOP_DIV;
3142 for (j = i - 4; j >= i0; j -= 4) {
3144 xr = a[j + 2] - a[k - 2];
3145 xi = a[j + 3] + a[k - 1];
3146 yr = wdr * xr - wdi * xi;
3147 yi = wdr * xi + wdi * xr;
3153 wki += ss * (0.5 - wdr);
3155 xi = a[j + 1] + a[k + 1];
3156 yr = wkr * xr - wki * xi;
3157 yi = wkr * xi + wki * xr;
3163 wdi += ss * (0.5 - wkr);
3168 wkr = 0.5 * sin(ec * i0);
3169 wki = 0.5 * cos(ec * i0);
3170 wdr = 0.5 - (wkr * w1r - wki * w1i);
3171 wdi = wkr * w1i + wki * w1r;
3175 xr = a[2] - a[n - 2];
3176 xi = a[3] + a[n - 1];
3177 yr = wdr * xr - wdi * xi;
3178 yi = wdr * xi + wdi * xr;
3185 __inline
void rftbsub(
int n,
float* a) {
3187 float ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3189 ec = 2 * M_PI_2 / n;
3201 i0 = i - 4 * RDFT_LOOP_DIV;
3205 for (j = i - 4; j >= i0; j -= 4) {
3207 xr = a[j + 2] - a[k - 2];
3208 xi = a[j + 3] + a[k - 1];
3209 yr = wdr * xr + wdi * xi;
3210 yi = wdr * xi - wdi * xr;
3216 wki += ss * (0.5 - wdr);
3218 xi = a[j + 1] + a[k + 1];
3219 yr = wkr * xr + wki * xi;
3220 yi = wkr * xi - wki * xr;
3226 wdi += ss * (0.5 - wkr);
3231 wkr = 0.5 * sin(ec * i0);
3232 wki = 0.5 * cos(ec * i0);
3233 wdr = 0.5 - (wkr * w1r - wki * w1i);
3234 wdi = wkr * w1i + wki * w1r;
3238 xr = a[2] - a[n - 2];
3239 xi = a[3] + a[n - 1];
3240 yr = wdr * xr + wdi * xi;
3241 yi = wdr * xi - wdi * xr;
3248 __inline
void dctsub(
int n,
float* a) {
3250 float ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3257 wdr = 0.5 * (w1r - w1i);
3258 wdi = 0.5 * (w1r + w1i);
3263 i0 = i + 2 * DCST_LOOP_DIV;
3267 for (j = i + 2; j <= i0; j += 2) {
3269 xr = wdi * a[j - 1] - wdr * a[k + 1];
3270 xi = wdr * a[j - 1] + wdi * a[k + 1];
3273 yr = wki * a[j] - wkr * a[k];
3274 yi = wkr * a[j] + wki * a[k];
3287 wkr = 0.5 * (wdr - wdi);
3288 wki = 0.5 * (wdr + wdi);
3289 wdr = wkr * w1r - wki * w1i;
3290 wdi = wkr * w1i + wki * w1r;
3293 xr = wdi * a[m - 1] - wdr * a[m + 1];
3294 a[m - 1] = wdr * a[m - 1] + wdi * a[m + 1];
3299 __inline
void dstsub(
int n,
float* a) {
3301 float ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3308 wdr = 0.5 * (w1r - w1i);
3309 wdi = 0.5 * (w1r + w1i);
3314 i0 = i + 2 * DCST_LOOP_DIV;
3318 for (j = i + 2; j <= i0; j += 2) {
3320 xr = wdi * a[k + 1] - wdr * a[j - 1];
3321 xi = wdr * a[k + 1] + wdi * a[j - 1];
3324 yr = wki * a[k] - wkr * a[j];
3325 yi = wkr * a[k] + wki * a[j];
3338 wkr = 0.5 * (wdr - wdi);
3339 wki = 0.5 * (wdr + wdi);
3340 wdr = wkr * w1r - wki * w1i;
3341 wdi = wkr * w1i + wki * w1r;
3344 xr = wdi * a[m + 1] - wdr * a[m - 1];
3345 a[m + 1] = wdr * a[m + 1] + wdi * a[m - 1];
3350 __inline
void dctsub4(
int n,
float* a) {
3352 float wki, wdr, wdi, xr;
3359 xr = wdi * a[1] - wdr * a[3];
3360 a[1] = wdr * a[1] + wdi * a[3];
3366 __inline
void dstsub4(
int n,
float* a) {
3368 float wki, wdr, wdi, xr;
3375 xr = wdi * a[3] - wdr * a[1];
3376 a[3] = wdr * a[3] + wdi * a[1];
3383#pragma clang diagnostic pop