beeping-core 2.0.0
C++20 library for encoding and decoding data over sound
Loading...
Searching...
No Matches
fftsg.h
1#ifndef __fftsg__
2#define __fftsg__
3
4/*
5Fast Fourier/Cosine/Sine Transform
6 dimension :one
7 data length :power of 2
8 decimation :frequency
9 radix :split-radix
10 data :inplace
11 table :not use
12functions
13 cdft: Complex Discrete Fourier Transform
14 rdft: Real Discrete Fourier Transform
15 ddct: Discrete Cosine Transform
16 ddst: Discrete Sine Transform
17 dfct: Cosine Transform of RDFT (Real Symmetric DFT)
18 dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
19function prototypes
20 void cdft(int, int, float *);
21 void rdft(int, int, float *);
22 void ddct(int, int, float *);
23 void ddst(int, int, float *);
24 void dfct(int, float *);
25 void dfst(int, float *);
26macro definitions
27 USE_CDFT_PTHREADS : default=not defined
28 CDFT_THREADS_BEGIN_N : must be >= 512, default=8192
29 CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536
30 USE_CDFT_WINTHREADS : default=not defined
31 CDFT_THREADS_BEGIN_N : must be >= 512, default=32768
32 CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288
33
34
35-------- Complex DFT (Discrete Fourier Transform) --------
36 [definition]
37 <case1>
38 X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
39 <case2>
40 X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
41 (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
42 [usage]
43 <case1>
44 cdft(2*n, 1, a);
45 <case2>
46 cdft(2*n, -1, a);
47 [parameters]
48 2*n :data length (int)
49 n >= 1, n = power of 2
50 a[0...2*n-1] :input/output data (float *)
51 input data
52 a[2*j] = Re(x[j]),
53 a[2*j+1] = Im(x[j]), 0<=j<n
54 output data
55 a[2*k] = Re(X[k]),
56 a[2*k+1] = Im(X[k]), 0<=k<n
57 [remark]
58 Inverse of
59 cdft(2*n, -1, a);
60 is
61 cdft(2*n, 1, a);
62 for (j = 0; j <= 2 * n - 1; j++) {
63 a[j] *= 1.0 / n;
64 }
65 .
66
67
68-------- Real DFT / Inverse of Real DFT --------
69 [definition]
70 <case1> RDFT
71 R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
72 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
73 <case2> IRDFT (excluding scale)
74 a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
75 sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
76 sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
77 [usage]
78 <case1>
79 rdft(n, 1, a);
80 <case2>
81 rdft(n, -1, a);
82 [parameters]
83 n :data length (int)
84 n >= 2, n = power of 2
85 a[0...n-1] :input/output data (float *)
86 <case1>
87 output data
88 a[2*k] = R[k], 0<=k<n/2
89 a[2*k+1] = I[k], 0<k<n/2
90 a[1] = R[n/2]
91 <case2>
92 input data
93 a[2*j] = R[j], 0<=j<n/2
94 a[2*j+1] = I[j], 0<j<n/2
95 a[1] = R[n/2]
96 [remark]
97 Inverse of
98 rdft(n, 1, a);
99 is
100 rdft(n, -1, a);
101 for (j = 0; j <= n - 1; j++) {
102 a[j] *= 2.0 / n;
103 }
104 .
105
106
107-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
108 [definition]
109 <case1> IDCT (excluding scale)
110 C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
111 <case2> DCT
112 C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
113 [usage]
114 <case1>
115 ddct(n, 1, a);
116 <case2>
117 ddct(n, -1, a);
118 [parameters]
119 n :data length (int)
120 n >= 2, n = power of 2
121 a[0...n-1] :input/output data (float *)
122 output data
123 a[k] = C[k], 0<=k<n
124 [remark]
125 Inverse of
126 ddct(n, -1, a);
127 is
128 a[0] *= 0.5;
129 ddct(n, 1, a);
130 for (j = 0; j <= n - 1; j++) {
131 a[j] *= 2.0 / n;
132 }
133 .
134
135
136-------- DST (Discrete Sine Transform) / Inverse of DST --------
137 [definition]
138 <case1> IDST (excluding scale)
139 S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
140 <case2> DST
141 S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
142 [usage]
143 <case1>
144 ddst(n, 1, a);
145 <case2>
146 ddst(n, -1, a);
147 [parameters]
148 n :data length (int)
149 n >= 2, n = power of 2
150 a[0...n-1] :input/output data (float *)
151 <case1>
152 input data
153 a[j] = A[j], 0<j<n
154 a[0] = A[n]
155 output data
156 a[k] = S[k], 0<=k<n
157 <case2>
158 output data
159 a[k] = S[k], 0<k<n
160 a[0] = S[n]
161 [remark]
162 Inverse of
163 ddst(n, -1, a);
164 is
165 a[0] *= 0.5;
166 ddst(n, 1, a);
167 for (j = 0; j <= n - 1; j++) {
168 a[j] *= 2.0 / n;
169 }
170 .
171
172
173-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
174 [definition]
175 C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
176 [usage]
177 dfct(n, a);
178 [parameters]
179 n :data length - 1 (int)
180 n >= 2, n = power of 2
181 a[0...n] :input/output data (float *)
182 output data
183 a[k] = C[k], 0<=k<=n
184 [remark]
185 Inverse of
186 a[0] *= 0.5;
187 a[n] *= 0.5;
188 dfct(n, a);
189 is
190 a[0] *= 0.5;
191 a[n] *= 0.5;
192 dfct(n, a);
193 for (j = 0; j <= n; j++) {
194 a[j] *= 2.0 / n;
195 }
196 .
197
198
199-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
200 [definition]
201 S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
202 [usage]
203 dfst(n, a);
204 [parameters]
205 n :data length + 1 (int)
206 n >= 2, n = power of 2
207 a[0...n-1] :input/output data (float *)
208 output data
209 a[k] = S[k], 0<k<n
210 (a[0] is used for work area)
211 [remark]
212 Inverse of
213 dfst(n, a);
214 is
215 dfst(n, a);
216 for (j = 1; j <= n - 1; j++) {
217 a[j] *= 2.0 / n;
218 }
219 .
220*/
221
222#include <cmath>
223
224#include "defines_ooura.h"
225
226#pragma clang diagnostic push
227#pragma clang diagnostic ignored "-Wunused-private-field"
228
229class CFFTOoura {
230 // constructor and destructor
231 public:
232 CFFTOoura() {};
233 ~CFFTOoura() {};
234 // member variables
235 private:
236 double* SinusTable[18];
237 double* CosinusTable[18];
238 int powertwo[18]; // mdb bugfix. was [14]
239 public:
240 __inline void cftbsub(int n, float* a) {
241 /*void bitrv2conj(int n, float *a);
242 void bitrv216neg(float *a);
243 void bitrv208neg(float *a);
244 void cftb1st(int n, float *a);
245 void cftrec4(int n, float *a);
246 void cftleaf(int n, int isplt, float *a);
247 void cftfx41(int n, float *a);
248 void cftf161(float *a);
249 void cftf081(float *a);
250 void cftb040(float *a);
251 void cftx020(float *a);*/
252#ifdef USE_CDFT_THREADS
253 void cftrec4_th(int n, float* a);
254#endif /* USE_CDFT_THREADS */
255
256 if (n > 8) {
257 if (n > 32) {
258 cftb1st(n, a);
259#ifdef USE_CDFT_THREADS
260 if (n > CDFT_THREADS_BEGIN_N) {
261 cftrec4_th(n, a);
262 } else
263#endif /* USE_CDFT_THREADS */
264 if (n > 512) {
265 cftrec4(n, a);
266 } else if (n > 128) {
267 cftleaf(n, 1, a);
268 } else {
269 cftfx41(n, a);
270 }
271 bitrv2conj(n, a);
272 } else if (n == 32) {
273 cftf161(a);
274 bitrv216neg(a);
275 } else {
276 cftf081(a);
277 bitrv208neg(a);
278 }
279 } else if (n == 8) {
280 cftb040(a);
281 } else if (n == 4) {
282 cftx020(a);
283 }
284 }
285
286 __inline void cdft(int n, int isgn, float* a) {
287 // void cftfsub(int n, float *a);
288 // void cftbsub(int n, float *a);
289
290 if (isgn >= 0) {
291 cftfsub(n, a);
292 } else {
293 cftbsub(n, a);
294 }
295 }
296
297 __inline void rdft(int n, int isgn, float* a) {
298 /*void cftfsub(int n, float *a);
299 void cftbsub(int n, float *a);
300 void rftfsub(int n, float *a);
301 void rftbsub(int n, float *a);*/
302 float xi;
303
304 if (isgn >= 0) {
305 if (n > 4) {
306 cftfsub(n, a);
307 rftfsub(n, a);
308 } else if (n == 4) {
309 cftfsub(n, a);
310 }
311 xi = a[0] - a[1];
312 a[0] += a[1];
313 a[1] = xi;
314 } else {
315 a[1] = 0.5 * (a[0] - a[1]);
316 a[0] -= a[1];
317 if (n > 4) {
318 rftbsub(n, a);
319 cftbsub(n, a);
320 } else if (n == 4) {
321 cftbsub(n, a);
322 }
323 }
324 }
325
326 __inline void ddct(int n, int isgn, float* a) {
327 /*void cftfsub(int n, float *a);
328 void cftbsub(int n, float *a);
329 void rftfsub(int n, float *a);
330 void rftbsub(int n, float *a);
331 void dctsub(int n, float *a);
332 void dctsub4(int n, float *a);*/
333 int j;
334 float xr;
335
336 if (isgn < 0) {
337 xr = a[n - 1];
338 for (j = n - 2; j >= 2; j -= 2) {
339 a[j + 1] = a[j] - a[j - 1];
340 a[j] += a[j - 1];
341 }
342 a[1] = a[0] - xr;
343 a[0] += xr;
344 if (n > 4) {
345 rftbsub(n, a);
346 cftbsub(n, a);
347 } else if (n == 4) {
348 cftbsub(n, a);
349 }
350 }
351 if (n > 4) {
352 dctsub(n, a);
353 } else {
354 dctsub4(n, a);
355 }
356 if (isgn >= 0) {
357 if (n > 4) {
358 cftfsub(n, a);
359 rftfsub(n, a);
360 } else if (n == 4) {
361 cftfsub(n, a);
362 }
363 xr = a[0] - a[1];
364 a[0] += a[1];
365 for (j = 2; j < n; j += 2) {
366 a[j - 1] = a[j] - a[j + 1];
367 a[j] += a[j + 1];
368 }
369 a[n - 1] = xr;
370 }
371 }
372
373 __inline void ddst(int n, int isgn, float* a) {
374 /*void cftfsub(int n, float *a);
375 void cftbsub(int n, float *a);
376 void rftfsub(int n, float *a);
377 void rftbsub(int n, float *a);
378 void dstsub(int n, float *a);
379 void dstsub4(int n, float *a);*/
380 int j;
381 float xr;
382
383 if (isgn < 0) {
384 xr = a[n - 1];
385 for (j = n - 2; j >= 2; j -= 2) {
386 a[j + 1] = -a[j] - a[j - 1];
387 a[j] -= a[j - 1];
388 }
389 a[1] = a[0] + xr;
390 a[0] -= xr;
391 if (n > 4) {
392 rftbsub(n, a);
393 cftbsub(n, a);
394 } else if (n == 4) {
395 cftbsub(n, a);
396 }
397 }
398 if (n > 4) {
399 dstsub(n, a);
400 } else {
401 dstsub4(n, a);
402 }
403 if (isgn >= 0) {
404 if (n > 4) {
405 cftfsub(n, a);
406 rftfsub(n, a);
407 } else if (n == 4) {
408 cftfsub(n, a);
409 }
410 xr = a[0] - a[1];
411 a[0] += a[1];
412 for (j = 2; j < n; j += 2) {
413 a[j - 1] = -a[j] - a[j + 1];
414 a[j] -= a[j + 1];
415 }
416 a[n - 1] = -xr;
417 }
418 }
419
420 __inline void dfct(int n, float* a) {
421 /*void ddct(int n, int isgn, float *a);
422 void bitrv1(int n, float *a);*/
423 int j, k, m, mh;
424 float xr, xi, yr, yi, an;
425
426 m = n >> 1;
427 for (j = 0; j < m; j++) {
428 k = n - j;
429 xr = a[j] + a[k];
430 a[j] -= a[k];
431 a[k] = xr;
432 }
433 an = a[n];
434 while (m >= 2) {
435 ddct(m, 1, a);
436 if (m > 2) {
437 bitrv1(m, a);
438 }
439 mh = m >> 1;
440 xi = a[m];
441 a[m] = a[0];
442 a[0] = an - xi;
443 an += xi;
444 for (j = 1; j < mh; j++) {
445 k = m - j;
446 xr = a[m + k];
447 xi = a[m + j];
448 yr = a[j];
449 yi = a[k];
450 a[m + j] = yr;
451 a[m + k] = yi;
452 a[j] = xr - xi;
453 a[k] = xr + xi;
454 }
455 xr = a[mh];
456 a[mh] = a[m + mh];
457 a[m + mh] = xr;
458 m = mh;
459 }
460 xi = a[1];
461 a[1] = a[0];
462 a[0] = an + xi;
463 a[n] = an - xi;
464 if (n > 2) {
465 bitrv1(n, a);
466 }
467 }
468
469 __inline void dfst(int n, float* a) {
470 /*void ddst(int n, int isgn, float *a);
471 void bitrv1(int n, float *a);*/
472 int j, k, m, mh;
473 float xr, xi, yr, yi;
474
475 m = n >> 1;
476 for (j = 1; j < m; j++) {
477 k = n - j;
478 xr = a[j] - a[k];
479 a[j] += a[k];
480 a[k] = xr;
481 }
482 a[0] = a[m];
483 while (m >= 2) {
484 ddst(m, 1, a);
485 if (m > 2) {
486 bitrv1(m, a);
487 }
488 mh = m >> 1;
489 for (j = 1; j < mh; j++) {
490 k = m - j;
491 xr = a[m + k];
492 xi = a[m + j];
493 yr = a[j];
494 yi = a[k];
495 a[m + j] = yr;
496 a[m + k] = yi;
497 a[j] = xr + xi;
498 a[k] = xr - xi;
499 }
500 a[m] = a[0];
501 a[0] = a[m + mh];
502 a[m + mh] = a[mh];
503 m = mh;
504 }
505 a[1] = a[0];
506 a[0] = 0;
507 if (n > 2) {
508 bitrv1(n, a);
509 }
510 }
511
512 /* -------- child routines -------- */
513
514#include <math.h>
515#ifndef M_PI_2
516#define M_PI_2 1.570796326794896619231321691639751442098584699687
517#endif
518#ifndef WR5000 /* cos(M_PI_2*0.5000) */
519#define WR5000 0.707106781186547524400844362104849039284835937688
520#endif
521#ifndef WR2500 /* cos(M_PI_2*0.2500) */
522#define WR2500 0.923879532511286756128183189396788286822416625863
523#endif
524#ifndef WI2500 /* sin(M_PI_2*0.2500) */
525#define WI2500 0.382683432365089771728459984030398866761344562485
526#endif
527#ifndef WR1250 /* cos(M_PI_2*0.1250) */
528#define WR1250 0.980785280403230449126182236134239036973933730893
529#endif
530#ifndef WI1250 /* sin(M_PI_2*0.1250) */
531#define WI1250 0.195090322016128267848284868477022240927691617751
532#endif
533#ifndef WR3750 /* cos(M_PI_2*0.3750) */
534#define WR3750 0.831469612302545237078788377617905756738560811987
535#endif
536#ifndef WI3750 /* sin(M_PI_2*0.3750) */
537#define WI3750 0.555570233019602224742830813948532874374937190754
538#endif
539
540#ifdef USE_CDFT_PTHREADS
541#define USE_CDFT_THREADS
542#ifndef CDFT_THREADS_BEGIN_N
543#define CDFT_THREADS_BEGIN_N 8192
544#endif
545#ifndef CDFT_4THREADS_BEGIN_N
546#define CDFT_4THREADS_BEGIN_N 65536
547#endif
548#include <pthread.h>
549#include <stdio.h>
550#include <stdlib.h>
551#define cdft_thread_t pthread_t
552#define cdft_thread_create(thp, func, argp) \
553 { \
554 if (pthread_create(thp, NULL, func, (void*)argp) != 0) { \
555 fprintf(stderr, "cdft thread error\n"); \
556 exit(1); \
557 } \
558 }
559#define cdft_thread_wait(th) \
560 { \
561 if (pthread_join(th, NULL) != 0) { \
562 fprintf(stderr, "cdft thread error\n"); \
563 exit(1); \
564 } \
565 }
566#endif /* USE_CDFT_PTHREADS */
567
568#ifdef USE_CDFT_WINTHREADS
569#define USE_CDFT_THREADS
570#ifndef CDFT_THREADS_BEGIN_N
571#define CDFT_THREADS_BEGIN_N 32768
572#endif
573#ifndef CDFT_4THREADS_BEGIN_N
574#define CDFT_4THREADS_BEGIN_N 524288
575#endif
576#include <stdio.h>
577#include <stdlib.h>
578#include <windows.h>
579#define cdft_thread_t HANDLE
580#define cdft_thread_create(thp, func, argp) \
581 { \
582 DWORD thid; \
583 *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)func, (LPVOID)argp, \
584 0, &thid); \
585 if (*(thp) == 0) { \
586 fprintf(stderr, "cdft thread error\n"); \
587 exit(1); \
588 } \
589 }
590#define cdft_thread_wait(th) \
591 { \
592 WaitForSingleObject(th, INFINITE); \
593 CloseHandle(th); \
594 }
595#endif /* USE_CDFT_WINTHREADS */
596
597#ifndef CDFT_LOOP_DIV /* control of the CDFT's speed & tolerance */
598#define CDFT_LOOP_DIV 32
599#endif
600
601#ifndef RDFT_LOOP_DIV /* control of the RDFT's speed & tolerance */
602#define RDFT_LOOP_DIV 64
603#endif
604
605#ifndef DCST_LOOP_DIV /* control of the DCT,DST's speed & tolerance */
606#define DCST_LOOP_DIV 64
607#endif
608
609 __inline void cftfsub(int n, float* a) {
610 /*void bitrv2(int n, float *a);
611 void bitrv216(float *a);
612 void bitrv208(float *a);
613 void cftmdl1(int n, float *a);
614 void cftrec4(int n, float *a);
615 void cftleaf(int n, int isplt, float *a);
616 void cftfx41(int n, float *a);
617 void cftf161(float *a);
618 void cftf081(float *a);
619 void cftf040(float *a);
620 void cftx020(float *a);*/
621#ifdef USE_CDFT_THREADS
622 void cftrec4_th(int n, float* a);
623#endif /* USE_CDFT_THREADS */
624
625 if (n > 8) {
626 if (n > 32) {
627 cftmdl1(n, a);
628#ifdef USE_CDFT_THREADS
629 if (n > CDFT_THREADS_BEGIN_N) {
630 cftrec4_th(n, a);
631 } else
632#endif /* USE_CDFT_THREADS */
633 if (n > 512) {
634 cftrec4(n, a);
635 } else if (n > 128) {
636 cftleaf(n, 1, a);
637 } else {
638 cftfx41(n, a);
639 }
640 bitrv2(n, a);
641 } else if (n == 32) {
642 cftf161(a);
643 bitrv216(a);
644 } else {
645 cftf081(a);
646 bitrv208(a);
647 }
648 } else if (n == 8) {
649 cftf040(a);
650 } else if (n == 4) {
651 cftx020(a);
652 }
653 }
654
655 __inline void bitrv2(int n, float* a) {
656 int j0, k0, j1, k1, l, m, i, j, k, nh;
657 float xr, xi, yr, yi;
658
659 m = 4;
660 for (l = n >> 2; l > 8; l >>= 2) {
661 m <<= 1;
662 }
663 nh = n >> 1;
664 if (l == 8) {
665 j0 = 0;
666 for (k0 = 0; k0 < m; k0 += 4) {
667 k = k0;
668 for (j = j0; j < j0 + k0; j += 4) {
669 xr = a[j];
670 xi = a[j + 1];
671 yr = a[k];
672 yi = a[k + 1];
673 a[j] = yr;
674 a[j + 1] = yi;
675 a[k] = xr;
676 a[k + 1] = xi;
677 j1 = j + m;
678 k1 = k + 2 * m;
679 xr = a[j1];
680 xi = a[j1 + 1];
681 yr = a[k1];
682 yi = a[k1 + 1];
683 a[j1] = yr;
684 a[j1 + 1] = yi;
685 a[k1] = xr;
686 a[k1 + 1] = xi;
687 j1 += m;
688 k1 -= m;
689 xr = a[j1];
690 xi = a[j1 + 1];
691 yr = a[k1];
692 yi = a[k1 + 1];
693 a[j1] = yr;
694 a[j1 + 1] = yi;
695 a[k1] = xr;
696 a[k1 + 1] = xi;
697 j1 += m;
698 k1 += 2 * m;
699 xr = a[j1];
700 xi = a[j1 + 1];
701 yr = a[k1];
702 yi = a[k1 + 1];
703 a[j1] = yr;
704 a[j1 + 1] = yi;
705 a[k1] = xr;
706 a[k1 + 1] = xi;
707 j1 += nh;
708 k1 += 2;
709 xr = a[j1];
710 xi = a[j1 + 1];
711 yr = a[k1];
712 yi = a[k1 + 1];
713 a[j1] = yr;
714 a[j1 + 1] = yi;
715 a[k1] = xr;
716 a[k1 + 1] = xi;
717 j1 -= m;
718 k1 -= 2 * m;
719 xr = a[j1];
720 xi = a[j1 + 1];
721 yr = a[k1];
722 yi = a[k1 + 1];
723 a[j1] = yr;
724 a[j1 + 1] = yi;
725 a[k1] = xr;
726 a[k1 + 1] = xi;
727 j1 -= m;
728 k1 += m;
729 xr = a[j1];
730 xi = a[j1 + 1];
731 yr = a[k1];
732 yi = a[k1 + 1];
733 a[j1] = yr;
734 a[j1 + 1] = yi;
735 a[k1] = xr;
736 a[k1 + 1] = xi;
737 j1 -= m;
738 k1 -= 2 * m;
739 xr = a[j1];
740 xi = a[j1 + 1];
741 yr = a[k1];
742 yi = a[k1 + 1];
743 a[j1] = yr;
744 a[j1 + 1] = yi;
745 a[k1] = xr;
746 a[k1 + 1] = xi;
747 j1 += 2;
748 k1 += nh;
749 xr = a[j1];
750 xi = a[j1 + 1];
751 yr = a[k1];
752 yi = a[k1 + 1];
753 a[j1] = yr;
754 a[j1 + 1] = yi;
755 a[k1] = xr;
756 a[k1 + 1] = xi;
757 j1 += m;
758 k1 += 2 * m;
759 xr = a[j1];
760 xi = a[j1 + 1];
761 yr = a[k1];
762 yi = a[k1 + 1];
763 a[j1] = yr;
764 a[j1 + 1] = yi;
765 a[k1] = xr;
766 a[k1 + 1] = xi;
767 j1 += m;
768 k1 -= m;
769 xr = a[j1];
770 xi = a[j1 + 1];
771 yr = a[k1];
772 yi = a[k1 + 1];
773 a[j1] = yr;
774 a[j1 + 1] = yi;
775 a[k1] = xr;
776 a[k1 + 1] = xi;
777 j1 += m;
778 k1 += 2 * m;
779 xr = a[j1];
780 xi = a[j1 + 1];
781 yr = a[k1];
782 yi = a[k1 + 1];
783 a[j1] = yr;
784 a[j1 + 1] = yi;
785 a[k1] = xr;
786 a[k1 + 1] = xi;
787 j1 -= nh;
788 k1 -= 2;
789 xr = a[j1];
790 xi = a[j1 + 1];
791 yr = a[k1];
792 yi = a[k1 + 1];
793 a[j1] = yr;
794 a[j1 + 1] = yi;
795 a[k1] = xr;
796 a[k1 + 1] = xi;
797 j1 -= m;
798 k1 -= 2 * m;
799 xr = a[j1];
800 xi = a[j1 + 1];
801 yr = a[k1];
802 yi = a[k1 + 1];
803 a[j1] = yr;
804 a[j1 + 1] = yi;
805 a[k1] = xr;
806 a[k1 + 1] = xi;
807 j1 -= m;
808 k1 += m;
809 xr = a[j1];
810 xi = a[j1 + 1];
811 yr = a[k1];
812 yi = a[k1 + 1];
813 a[j1] = yr;
814 a[j1 + 1] = yi;
815 a[k1] = xr;
816 a[k1 + 1] = xi;
817 j1 -= m;
818 k1 -= 2 * m;
819 xr = a[j1];
820 xi = a[j1 + 1];
821 yr = a[k1];
822 yi = a[k1 + 1];
823 a[j1] = yr;
824 a[j1 + 1] = yi;
825 a[k1] = xr;
826 a[k1 + 1] = xi;
827 for (i = nh >> 1; i > (k ^= i); i >>= 1);
828 }
829 k1 = j0 + k0;
830 j1 = k1 + 2;
831 k1 += nh;
832 xr = a[j1];
833 xi = a[j1 + 1];
834 yr = a[k1];
835 yi = a[k1 + 1];
836 a[j1] = yr;
837 a[j1 + 1] = yi;
838 a[k1] = xr;
839 a[k1 + 1] = xi;
840 j1 += m;
841 k1 += 2 * m;
842 xr = a[j1];
843 xi = a[j1 + 1];
844 yr = a[k1];
845 yi = a[k1 + 1];
846 a[j1] = yr;
847 a[j1 + 1] = yi;
848 a[k1] = xr;
849 a[k1 + 1] = xi;
850 j1 += m;
851 k1 -= m;
852 xr = a[j1];
853 xi = a[j1 + 1];
854 yr = a[k1];
855 yi = a[k1 + 1];
856 a[j1] = yr;
857 a[j1 + 1] = yi;
858 a[k1] = xr;
859 a[k1 + 1] = xi;
860 j1 -= 2;
861 k1 -= nh;
862 xr = a[j1];
863 xi = a[j1 + 1];
864 yr = a[k1];
865 yi = a[k1 + 1];
866 a[j1] = yr;
867 a[j1 + 1] = yi;
868 a[k1] = xr;
869 a[k1 + 1] = xi;
870 j1 += nh + 2;
871 k1 += nh + 2;
872 xr = a[j1];
873 xi = a[j1 + 1];
874 yr = a[k1];
875 yi = a[k1 + 1];
876 a[j1] = yr;
877 a[j1 + 1] = yi;
878 a[k1] = xr;
879 a[k1 + 1] = xi;
880 j1 -= nh - m;
881 k1 += 2 * m - 2;
882 xr = a[j1];
883 xi = a[j1 + 1];
884 yr = a[k1];
885 yi = a[k1 + 1];
886 a[j1] = yr;
887 a[j1 + 1] = yi;
888 a[k1] = xr;
889 a[k1 + 1] = xi;
890 for (i = nh >> 1; i > (j0 ^= i); i >>= 1);
891 }
892 } else {
893 j0 = 0;
894 for (k0 = 0; k0 < m; k0 += 4) {
895 k = k0;
896 for (j = j0; j < j0 + k0; j += 4) {
897 xr = a[j];
898 xi = a[j + 1];
899 yr = a[k];
900 yi = a[k + 1];
901 a[j] = yr;
902 a[j + 1] = yi;
903 a[k] = xr;
904 a[k + 1] = xi;
905 j1 = j + m;
906 k1 = k + m;
907 xr = a[j1];
908 xi = a[j1 + 1];
909 yr = a[k1];
910 yi = a[k1 + 1];
911 a[j1] = yr;
912 a[j1 + 1] = yi;
913 a[k1] = xr;
914 a[k1 + 1] = xi;
915 j1 += nh;
916 k1 += 2;
917 xr = a[j1];
918 xi = a[j1 + 1];
919 yr = a[k1];
920 yi = a[k1 + 1];
921 a[j1] = yr;
922 a[j1 + 1] = yi;
923 a[k1] = xr;
924 a[k1 + 1] = xi;
925 j1 -= m;
926 k1 -= m;
927 xr = a[j1];
928 xi = a[j1 + 1];
929 yr = a[k1];
930 yi = a[k1 + 1];
931 a[j1] = yr;
932 a[j1 + 1] = yi;
933 a[k1] = xr;
934 a[k1 + 1] = xi;
935 j1 += 2;
936 k1 += nh;
937 xr = a[j1];
938 xi = a[j1 + 1];
939 yr = a[k1];
940 yi = a[k1 + 1];
941 a[j1] = yr;
942 a[j1 + 1] = yi;
943 a[k1] = xr;
944 a[k1 + 1] = xi;
945 j1 += m;
946 k1 += m;
947 xr = a[j1];
948 xi = a[j1 + 1];
949 yr = a[k1];
950 yi = a[k1 + 1];
951 a[j1] = yr;
952 a[j1 + 1] = yi;
953 a[k1] = xr;
954 a[k1 + 1] = xi;
955 j1 -= nh;
956 k1 -= 2;
957 xr = a[j1];
958 xi = a[j1 + 1];
959 yr = a[k1];
960 yi = a[k1 + 1];
961 a[j1] = yr;
962 a[j1 + 1] = yi;
963 a[k1] = xr;
964 a[k1 + 1] = xi;
965 j1 -= m;
966 k1 -= m;
967 xr = a[j1];
968 xi = a[j1 + 1];
969 yr = a[k1];
970 yi = a[k1 + 1];
971 a[j1] = yr;
972 a[j1 + 1] = yi;
973 a[k1] = xr;
974 a[k1 + 1] = xi;
975 for (i = nh >> 1; i > (k ^= i); i >>= 1);
976 }
977 k1 = j0 + k0;
978 j1 = k1 + 2;
979 k1 += nh;
980 xr = a[j1];
981 xi = a[j1 + 1];
982 yr = a[k1];
983 yi = a[k1 + 1];
984 a[j1] = yr;
985 a[j1 + 1] = yi;
986 a[k1] = xr;
987 a[k1 + 1] = xi;
988 j1 += m;
989 k1 += m;
990 xr = a[j1];
991 xi = a[j1 + 1];
992 yr = a[k1];
993 yi = a[k1 + 1];
994 a[j1] = yr;
995 a[j1 + 1] = yi;
996 a[k1] = xr;
997 a[k1 + 1] = xi;
998 for (i = nh >> 1; i > (j0 ^= i); i >>= 1);
999 }
1000 }
1001 }
1002
1003 __inline void bitrv2conj(int n, float* a) {
1004 int j0, k0, j1, k1, l, m, i, j, k, nh;
1005 float xr, xi, yr, yi;
1006
1007 m = 4;
1008 for (l = n >> 2; l > 8; l >>= 2) {
1009 m <<= 1;
1010 }
1011 nh = n >> 1;
1012 if (l == 8) {
1013 j0 = 0;
1014 for (k0 = 0; k0 < m; k0 += 4) {
1015 k = k0;
1016 for (j = j0; j < j0 + k0; j += 4) {
1017 xr = a[j];
1018 xi = -a[j + 1];
1019 yr = a[k];
1020 yi = -a[k + 1];
1021 a[j] = yr;
1022 a[j + 1] = yi;
1023 a[k] = xr;
1024 a[k + 1] = xi;
1025 j1 = j + m;
1026 k1 = k + 2 * m;
1027 xr = a[j1];
1028 xi = -a[j1 + 1];
1029 yr = a[k1];
1030 yi = -a[k1 + 1];
1031 a[j1] = yr;
1032 a[j1 + 1] = yi;
1033 a[k1] = xr;
1034 a[k1 + 1] = xi;
1035 j1 += m;
1036 k1 -= m;
1037 xr = a[j1];
1038 xi = -a[j1 + 1];
1039 yr = a[k1];
1040 yi = -a[k1 + 1];
1041 a[j1] = yr;
1042 a[j1 + 1] = yi;
1043 a[k1] = xr;
1044 a[k1 + 1] = xi;
1045 j1 += m;
1046 k1 += 2 * m;
1047 xr = a[j1];
1048 xi = -a[j1 + 1];
1049 yr = a[k1];
1050 yi = -a[k1 + 1];
1051 a[j1] = yr;
1052 a[j1 + 1] = yi;
1053 a[k1] = xr;
1054 a[k1 + 1] = xi;
1055 j1 += nh;
1056 k1 += 2;
1057 xr = a[j1];
1058 xi = -a[j1 + 1];
1059 yr = a[k1];
1060 yi = -a[k1 + 1];
1061 a[j1] = yr;
1062 a[j1 + 1] = yi;
1063 a[k1] = xr;
1064 a[k1 + 1] = xi;
1065 j1 -= m;
1066 k1 -= 2 * m;
1067 xr = a[j1];
1068 xi = -a[j1 + 1];
1069 yr = a[k1];
1070 yi = -a[k1 + 1];
1071 a[j1] = yr;
1072 a[j1 + 1] = yi;
1073 a[k1] = xr;
1074 a[k1 + 1] = xi;
1075 j1 -= m;
1076 k1 += m;
1077 xr = a[j1];
1078 xi = -a[j1 + 1];
1079 yr = a[k1];
1080 yi = -a[k1 + 1];
1081 a[j1] = yr;
1082 a[j1 + 1] = yi;
1083 a[k1] = xr;
1084 a[k1 + 1] = xi;
1085 j1 -= m;
1086 k1 -= 2 * m;
1087 xr = a[j1];
1088 xi = -a[j1 + 1];
1089 yr = a[k1];
1090 yi = -a[k1 + 1];
1091 a[j1] = yr;
1092 a[j1 + 1] = yi;
1093 a[k1] = xr;
1094 a[k1 + 1] = xi;
1095 j1 += 2;
1096 k1 += nh;
1097 xr = a[j1];
1098 xi = -a[j1 + 1];
1099 yr = a[k1];
1100 yi = -a[k1 + 1];
1101 a[j1] = yr;
1102 a[j1 + 1] = yi;
1103 a[k1] = xr;
1104 a[k1 + 1] = xi;
1105 j1 += m;
1106 k1 += 2 * m;
1107 xr = a[j1];
1108 xi = -a[j1 + 1];
1109 yr = a[k1];
1110 yi = -a[k1 + 1];
1111 a[j1] = yr;
1112 a[j1 + 1] = yi;
1113 a[k1] = xr;
1114 a[k1 + 1] = xi;
1115 j1 += m;
1116 k1 -= m;
1117 xr = a[j1];
1118 xi = -a[j1 + 1];
1119 yr = a[k1];
1120 yi = -a[k1 + 1];
1121 a[j1] = yr;
1122 a[j1 + 1] = yi;
1123 a[k1] = xr;
1124 a[k1 + 1] = xi;
1125 j1 += m;
1126 k1 += 2 * m;
1127 xr = a[j1];
1128 xi = -a[j1 + 1];
1129 yr = a[k1];
1130 yi = -a[k1 + 1];
1131 a[j1] = yr;
1132 a[j1 + 1] = yi;
1133 a[k1] = xr;
1134 a[k1 + 1] = xi;
1135 j1 -= nh;
1136 k1 -= 2;
1137 xr = a[j1];
1138 xi = -a[j1 + 1];
1139 yr = a[k1];
1140 yi = -a[k1 + 1];
1141 a[j1] = yr;
1142 a[j1 + 1] = yi;
1143 a[k1] = xr;
1144 a[k1 + 1] = xi;
1145 j1 -= m;
1146 k1 -= 2 * m;
1147 xr = a[j1];
1148 xi = -a[j1 + 1];
1149 yr = a[k1];
1150 yi = -a[k1 + 1];
1151 a[j1] = yr;
1152 a[j1 + 1] = yi;
1153 a[k1] = xr;
1154 a[k1 + 1] = xi;
1155 j1 -= m;
1156 k1 += m;
1157 xr = a[j1];
1158 xi = -a[j1 + 1];
1159 yr = a[k1];
1160 yi = -a[k1 + 1];
1161 a[j1] = yr;
1162 a[j1 + 1] = yi;
1163 a[k1] = xr;
1164 a[k1 + 1] = xi;
1165 j1 -= m;
1166 k1 -= 2 * m;
1167 xr = a[j1];
1168 xi = -a[j1 + 1];
1169 yr = a[k1];
1170 yi = -a[k1 + 1];
1171 a[j1] = yr;
1172 a[j1 + 1] = yi;
1173 a[k1] = xr;
1174 a[k1 + 1] = xi;
1175 for (i = nh >> 1; i > (k ^= i); i >>= 1);
1176 }
1177 k1 = j0 + k0;
1178 j1 = k1 + 2;
1179 k1 += nh;
1180 a[j1 - 1] = -a[j1 - 1];
1181 xr = a[j1];
1182 xi = -a[j1 + 1];
1183 yr = a[k1];
1184 yi = -a[k1 + 1];
1185 a[j1] = yr;
1186 a[j1 + 1] = yi;
1187 a[k1] = xr;
1188 a[k1 + 1] = xi;
1189 a[k1 + 3] = -a[k1 + 3];
1190 j1 += m;
1191 k1 += 2 * m;
1192 xr = a[j1];
1193 xi = -a[j1 + 1];
1194 yr = a[k1];
1195 yi = -a[k1 + 1];
1196 a[j1] = yr;
1197 a[j1 + 1] = yi;
1198 a[k1] = xr;
1199 a[k1 + 1] = xi;
1200 j1 += m;
1201 k1 -= m;
1202 xr = a[j1];
1203 xi = -a[j1 + 1];
1204 yr = a[k1];
1205 yi = -a[k1 + 1];
1206 a[j1] = yr;
1207 a[j1 + 1] = yi;
1208 a[k1] = xr;
1209 a[k1 + 1] = xi;
1210 j1 -= 2;
1211 k1 -= nh;
1212 xr = a[j1];
1213 xi = -a[j1 + 1];
1214 yr = a[k1];
1215 yi = -a[k1 + 1];
1216 a[j1] = yr;
1217 a[j1 + 1] = yi;
1218 a[k1] = xr;
1219 a[k1 + 1] = xi;
1220 j1 += nh + 2;
1221 k1 += nh + 2;
1222 xr = a[j1];
1223 xi = -a[j1 + 1];
1224 yr = a[k1];
1225 yi = -a[k1 + 1];
1226 a[j1] = yr;
1227 a[j1 + 1] = yi;
1228 a[k1] = xr;
1229 a[k1 + 1] = xi;
1230 j1 -= nh - m;
1231 k1 += 2 * m - 2;
1232 a[j1 - 1] = -a[j1 - 1];
1233 xr = a[j1];
1234 xi = -a[j1 + 1];
1235 yr = a[k1];
1236 yi = -a[k1 + 1];
1237 a[j1] = yr;
1238 a[j1 + 1] = yi;
1239 a[k1] = xr;
1240 a[k1 + 1] = xi;
1241 a[k1 + 3] = -a[k1 + 3];
1242 for (i = nh >> 1; i > (j0 ^= i); i >>= 1);
1243 }
1244 } else {
1245 j0 = 0;
1246 for (k0 = 0; k0 < m; k0 += 4) {
1247 k = k0;
1248 for (j = j0; j < j0 + k0; j += 4) {
1249 xr = a[j];
1250 xi = -a[j + 1];
1251 yr = a[k];
1252 yi = -a[k + 1];
1253 a[j] = yr;
1254 a[j + 1] = yi;
1255 a[k] = xr;
1256 a[k + 1] = xi;
1257 j1 = j + m;
1258 k1 = k + m;
1259 xr = a[j1];
1260 xi = -a[j1 + 1];
1261 yr = a[k1];
1262 yi = -a[k1 + 1];
1263 a[j1] = yr;
1264 a[j1 + 1] = yi;
1265 a[k1] = xr;
1266 a[k1 + 1] = xi;
1267 j1 += nh;
1268 k1 += 2;
1269 xr = a[j1];
1270 xi = -a[j1 + 1];
1271 yr = a[k1];
1272 yi = -a[k1 + 1];
1273 a[j1] = yr;
1274 a[j1 + 1] = yi;
1275 a[k1] = xr;
1276 a[k1 + 1] = xi;
1277 j1 -= m;
1278 k1 -= m;
1279 xr = a[j1];
1280 xi = -a[j1 + 1];
1281 yr = a[k1];
1282 yi = -a[k1 + 1];
1283 a[j1] = yr;
1284 a[j1 + 1] = yi;
1285 a[k1] = xr;
1286 a[k1 + 1] = xi;
1287 j1 += 2;
1288 k1 += nh;
1289 xr = a[j1];
1290 xi = -a[j1 + 1];
1291 yr = a[k1];
1292 yi = -a[k1 + 1];
1293 a[j1] = yr;
1294 a[j1 + 1] = yi;
1295 a[k1] = xr;
1296 a[k1 + 1] = xi;
1297 j1 += m;
1298 k1 += m;
1299 xr = a[j1];
1300 xi = -a[j1 + 1];
1301 yr = a[k1];
1302 yi = -a[k1 + 1];
1303 a[j1] = yr;
1304 a[j1 + 1] = yi;
1305 a[k1] = xr;
1306 a[k1 + 1] = xi;
1307 j1 -= nh;
1308 k1 -= 2;
1309 xr = a[j1];
1310 xi = -a[j1 + 1];
1311 yr = a[k1];
1312 yi = -a[k1 + 1];
1313 a[j1] = yr;
1314 a[j1 + 1] = yi;
1315 a[k1] = xr;
1316 a[k1 + 1] = xi;
1317 j1 -= m;
1318 k1 -= m;
1319 xr = a[j1];
1320 xi = -a[j1 + 1];
1321 yr = a[k1];
1322 yi = -a[k1 + 1];
1323 a[j1] = yr;
1324 a[j1 + 1] = yi;
1325 a[k1] = xr;
1326 a[k1 + 1] = xi;
1327 for (i = nh >> 1; i > (k ^= i); i >>= 1);
1328 }
1329 k1 = j0 + k0;
1330 j1 = k1 + 2;
1331 k1 += nh;
1332 a[j1 - 1] = -a[j1 - 1];
1333 xr = a[j1];
1334 xi = -a[j1 + 1];
1335 yr = a[k1];
1336 yi = -a[k1 + 1];
1337 a[j1] = yr;
1338 a[j1 + 1] = yi;
1339 a[k1] = xr;
1340 a[k1 + 1] = xi;
1341 a[k1 + 3] = -a[k1 + 3];
1342 j1 += m;
1343 k1 += m;
1344 a[j1 - 1] = -a[j1 - 1];
1345 xr = a[j1];
1346 xi = -a[j1 + 1];
1347 yr = a[k1];
1348 yi = -a[k1 + 1];
1349 a[j1] = yr;
1350 a[j1 + 1] = yi;
1351 a[k1] = xr;
1352 a[k1 + 1] = xi;
1353 a[k1 + 3] = -a[k1 + 3];
1354 for (i = nh >> 1; i > (j0 ^= i); i >>= 1);
1355 }
1356 }
1357 }
1358
1359 __inline void bitrv216(float* a) {
1360 float x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x7r, x7i, x8r, x8i,
1361 x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
1362
1363 x1r = a[2];
1364 x1i = a[3];
1365 x2r = a[4];
1366 x2i = a[5];
1367 x3r = a[6];
1368 x3i = a[7];
1369 x4r = a[8];
1370 x4i = a[9];
1371 x5r = a[10];
1372 x5i = a[11];
1373 x7r = a[14];
1374 x7i = a[15];
1375 x8r = a[16];
1376 x8i = a[17];
1377 x10r = a[20];
1378 x10i = a[21];
1379 x11r = a[22];
1380 x11i = a[23];
1381 x12r = a[24];
1382 x12i = a[25];
1383 x13r = a[26];
1384 x13i = a[27];
1385 x14r = a[28];
1386 x14i = a[29];
1387 a[2] = x8r;
1388 a[3] = x8i;
1389 a[4] = x4r;
1390 a[5] = x4i;
1391 a[6] = x12r;
1392 a[7] = x12i;
1393 a[8] = x2r;
1394 a[9] = x2i;
1395 a[10] = x10r;
1396 a[11] = x10i;
1397 a[14] = x14r;
1398 a[15] = x14i;
1399 a[16] = x1r;
1400 a[17] = x1i;
1401 a[20] = x5r;
1402 a[21] = x5i;
1403 a[22] = x13r;
1404 a[23] = x13i;
1405 a[24] = x3r;
1406 a[25] = x3i;
1407 a[26] = x11r;
1408 a[27] = x11i;
1409 a[28] = x7r;
1410 a[29] = x7i;
1411 }
1412
1413 __inline void bitrv216neg(float* a) {
1414 float x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i,
1415 x8r, x8i, x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i,
1416 x14r, x14i, x15r, x15i;
1417
1418 x1r = a[2];
1419 x1i = a[3];
1420 x2r = a[4];
1421 x2i = a[5];
1422 x3r = a[6];
1423 x3i = a[7];
1424 x4r = a[8];
1425 x4i = a[9];
1426 x5r = a[10];
1427 x5i = a[11];
1428 x6r = a[12];
1429 x6i = a[13];
1430 x7r = a[14];
1431 x7i = a[15];
1432 x8r = a[16];
1433 x8i = a[17];
1434 x9r = a[18];
1435 x9i = a[19];
1436 x10r = a[20];
1437 x10i = a[21];
1438 x11r = a[22];
1439 x11i = a[23];
1440 x12r = a[24];
1441 x12i = a[25];
1442 x13r = a[26];
1443 x13i = a[27];
1444 x14r = a[28];
1445 x14i = a[29];
1446 x15r = a[30];
1447 x15i = a[31];
1448 a[2] = x15r;
1449 a[3] = x15i;
1450 a[4] = x7r;
1451 a[5] = x7i;
1452 a[6] = x11r;
1453 a[7] = x11i;
1454 a[8] = x3r;
1455 a[9] = x3i;
1456 a[10] = x13r;
1457 a[11] = x13i;
1458 a[12] = x5r;
1459 a[13] = x5i;
1460 a[14] = x9r;
1461 a[15] = x9i;
1462 a[16] = x1r;
1463 a[17] = x1i;
1464 a[18] = x14r;
1465 a[19] = x14i;
1466 a[20] = x6r;
1467 a[21] = x6i;
1468 a[22] = x10r;
1469 a[23] = x10i;
1470 a[24] = x2r;
1471 a[25] = x2i;
1472 a[26] = x12r;
1473 a[27] = x12i;
1474 a[28] = x4r;
1475 a[29] = x4i;
1476 a[30] = x8r;
1477 a[31] = x8i;
1478 }
1479
1480 __inline void bitrv208(float* a) {
1481 float x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
1482
1483 x1r = a[2];
1484 x1i = a[3];
1485 x3r = a[6];
1486 x3i = a[7];
1487 x4r = a[8];
1488 x4i = a[9];
1489 x6r = a[12];
1490 x6i = a[13];
1491 a[2] = x4r;
1492 a[3] = x4i;
1493 a[6] = x6r;
1494 a[7] = x6i;
1495 a[8] = x1r;
1496 a[9] = x1i;
1497 a[12] = x3r;
1498 a[13] = x3i;
1499 }
1500
1501 __inline void bitrv208neg(float* a) {
1502 float x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i;
1503
1504 x1r = a[2];
1505 x1i = a[3];
1506 x2r = a[4];
1507 x2i = a[5];
1508 x3r = a[6];
1509 x3i = a[7];
1510 x4r = a[8];
1511 x4i = a[9];
1512 x5r = a[10];
1513 x5i = a[11];
1514 x6r = a[12];
1515 x6i = a[13];
1516 x7r = a[14];
1517 x7i = a[15];
1518 a[2] = x7r;
1519 a[3] = x7i;
1520 a[4] = x3r;
1521 a[5] = x3i;
1522 a[6] = x5r;
1523 a[7] = x5i;
1524 a[8] = x1r;
1525 a[9] = x1i;
1526 a[10] = x6r;
1527 a[11] = x6i;
1528 a[12] = x2r;
1529 a[13] = x2i;
1530 a[14] = x4r;
1531 a[15] = x4i;
1532 }
1533
1534 __inline void bitrv1(int n, float* a) {
1535 int j0, k0, j1, k1, l, m, i, j, k, nh;
1536 float x;
1537
1538 nh = n >> 1;
1539 x = a[1];
1540 a[1] = a[nh];
1541 a[nh] = x;
1542 m = 2;
1543 for (l = n >> 2; l > 2; l >>= 2) {
1544 m <<= 1;
1545 }
1546 if (l == 2) {
1547 j1 = m + 1;
1548 k1 = m + nh;
1549 x = a[j1];
1550 a[j1] = a[k1];
1551 a[k1] = x;
1552 j0 = 0;
1553 for (k0 = 2; k0 < m; k0 += 2) {
1554 for (i = nh >> 1; i > (j0 ^= i); i >>= 1);
1555 k = k0;
1556 for (j = j0; j < j0 + k0; j += 2) {
1557 x = a[j];
1558 a[j] = a[k];
1559 a[k] = x;
1560 j1 = j + m;
1561 k1 = k + m;
1562 x = a[j1];
1563 a[j1] = a[k1];
1564 a[k1] = x;
1565 j1 += nh;
1566 k1++;
1567 x = a[j1];
1568 a[j1] = a[k1];
1569 a[k1] = x;
1570 j1 -= m;
1571 k1 -= m;
1572 x = a[j1];
1573 a[j1] = a[k1];
1574 a[k1] = x;
1575 j1++;
1576 k1 += nh;
1577 x = a[j1];
1578 a[j1] = a[k1];
1579 a[k1] = x;
1580 j1 += m;
1581 k1 += m;
1582 x = a[j1];
1583 a[j1] = a[k1];
1584 a[k1] = x;
1585 j1 -= nh;
1586 k1--;
1587 x = a[j1];
1588 a[j1] = a[k1];
1589 a[k1] = x;
1590 j1 -= m;
1591 k1 -= m;
1592 x = a[j1];
1593 a[j1] = a[k1];
1594 a[k1] = x;
1595 for (i = nh >> 1; i > (k ^= i); i >>= 1);
1596 }
1597 k1 = j0 + k0;
1598 j1 = k1 + 1;
1599 k1 += nh;
1600 x = a[j1];
1601 a[j1] = a[k1];
1602 a[k1] = x;
1603 j1 += m;
1604 k1 += m;
1605 x = a[j1];
1606 a[j1] = a[k1];
1607 a[k1] = x;
1608 }
1609 } else {
1610 j0 = 0;
1611 for (k0 = 2; k0 < m; k0 += 2) {
1612 for (i = nh >> 1; i > (j0 ^= i); i >>= 1);
1613 k = k0;
1614 for (j = j0; j < j0 + k0; j += 2) {
1615 x = a[j];
1616 a[j] = a[k];
1617 a[k] = x;
1618 j1 = j + nh;
1619 k1 = k + 1;
1620 x = a[j1];
1621 a[j1] = a[k1];
1622 a[k1] = x;
1623 j1++;
1624 k1 += nh;
1625 x = a[j1];
1626 a[j1] = a[k1];
1627 a[k1] = x;
1628 j1 -= nh;
1629 k1--;
1630 x = a[j1];
1631 a[j1] = a[k1];
1632 a[k1] = x;
1633 for (i = nh >> 1; i > (k ^= i); i >>= 1);
1634 }
1635 k1 = j0 + k0;
1636 j1 = k1 + 1;
1637 k1 += nh;
1638 x = a[j1];
1639 a[j1] = a[k1];
1640 a[k1] = x;
1641 }
1642 }
1643 }
1644
1645 __inline void cftb1st(int n, float* a) {
1646 int i, i0, j, j0, j1, j2, j3, m, mh;
1647 float ew, w1r, w1i, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i, ss1,
1648 ss3;
1649 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1650
1651 mh = n >> 3;
1652 m = 2 * mh;
1653 j1 = m;
1654 j2 = j1 + m;
1655 j3 = j2 + m;
1656 x0r = a[0] + a[j2];
1657 x0i = -a[1] - a[j2 + 1];
1658 x1r = a[0] - a[j2];
1659 x1i = -a[1] + a[j2 + 1];
1660 x2r = a[j1] + a[j3];
1661 x2i = a[j1 + 1] + a[j3 + 1];
1662 x3r = a[j1] - a[j3];
1663 x3i = a[j1 + 1] - a[j3 + 1];
1664 a[0] = x0r + x2r;
1665 a[1] = x0i - x2i;
1666 a[j1] = x0r - x2r;
1667 a[j1 + 1] = x0i + x2i;
1668 a[j2] = x1r + x3i;
1669 a[j2 + 1] = x1i + x3r;
1670 a[j3] = x1r - x3i;
1671 a[j3 + 1] = x1i - x3r;
1672 wd1r = 1;
1673 wd1i = 0;
1674 wd3r = 1;
1675 wd3i = 0;
1676 ew = M_PI_2 / m;
1677 w1r = cos(2 * ew);
1678 w1i = sin(2 * ew);
1679 wk1r = w1r;
1680 wk1i = w1i;
1681 ss1 = 2 * w1i;
1682 wk3i = 2 * ss1 * wk1r;
1683 wk3r = wk1r - wk3i * wk1i;
1684 wk3i = wk1i - wk3i * wk1r;
1685 ss3 = 2 * wk3i;
1686 i = 0;
1687 for (;;) {
1688 i0 = i + 4 * CDFT_LOOP_DIV;
1689 if (i0 > mh - 4) {
1690 i0 = mh - 4;
1691 }
1692 for (j = i + 2; j < i0; j += 4) {
1693 wd1r -= ss1 * wk1i;
1694 wd1i += ss1 * wk1r;
1695 wd3r -= ss3 * wk3i;
1696 wd3i += ss3 * wk3r;
1697 j1 = j + m;
1698 j2 = j1 + m;
1699 j3 = j2 + m;
1700 x0r = a[j] + a[j2];
1701 x0i = -a[j + 1] - a[j2 + 1];
1702 x1r = a[j] - a[j2];
1703 x1i = -a[j + 1] + a[j2 + 1];
1704 x2r = a[j1] + a[j3];
1705 x2i = a[j1 + 1] + a[j3 + 1];
1706 x3r = a[j1] - a[j3];
1707 x3i = a[j1 + 1] - a[j3 + 1];
1708 a[j] = x0r + x2r;
1709 a[j + 1] = x0i - x2i;
1710 a[j1] = x0r - x2r;
1711 a[j1 + 1] = x0i + x2i;
1712 x0r = x1r + x3i;
1713 x0i = x1i + x3r;
1714 a[j2] = wk1r * x0r - wk1i * x0i;
1715 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
1716 x0r = x1r - x3i;
1717 x0i = x1i - x3r;
1718 a[j3] = wk3r * x0r + wk3i * x0i;
1719 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
1720 x0r = a[j + 2] + a[j2 + 2];
1721 x0i = -a[j + 3] - a[j2 + 3];
1722 x1r = a[j + 2] - a[j2 + 2];
1723 x1i = -a[j + 3] + a[j2 + 3];
1724 x2r = a[j1 + 2] + a[j3 + 2];
1725 x2i = a[j1 + 3] + a[j3 + 3];
1726 x3r = a[j1 + 2] - a[j3 + 2];
1727 x3i = a[j1 + 3] - a[j3 + 3];
1728 a[j + 2] = x0r + x2r;
1729 a[j + 3] = x0i - x2i;
1730 a[j1 + 2] = x0r - x2r;
1731 a[j1 + 3] = x0i + x2i;
1732 x0r = x1r + x3i;
1733 x0i = x1i + x3r;
1734 a[j2 + 2] = wd1r * x0r - wd1i * x0i;
1735 a[j2 + 3] = wd1r * x0i + wd1i * x0r;
1736 x0r = x1r - x3i;
1737 x0i = x1i - x3r;
1738 a[j3 + 2] = wd3r * x0r + wd3i * x0i;
1739 a[j3 + 3] = wd3r * x0i - wd3i * x0r;
1740 j0 = m - j;
1741 j1 = j0 + m;
1742 j2 = j1 + m;
1743 j3 = j2 + m;
1744 x0r = a[j0] + a[j2];
1745 x0i = -a[j0 + 1] - a[j2 + 1];
1746 x1r = a[j0] - a[j2];
1747 x1i = -a[j0 + 1] + a[j2 + 1];
1748 x2r = a[j1] + a[j3];
1749 x2i = a[j1 + 1] + a[j3 + 1];
1750 x3r = a[j1] - a[j3];
1751 x3i = a[j1 + 1] - a[j3 + 1];
1752 a[j0] = x0r + x2r;
1753 a[j0 + 1] = x0i - x2i;
1754 a[j1] = x0r - x2r;
1755 a[j1 + 1] = x0i + x2i;
1756 x0r = x1r + x3i;
1757 x0i = x1i + x3r;
1758 a[j2] = wk1i * x0r - wk1r * x0i;
1759 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
1760 x0r = x1r - x3i;
1761 x0i = x1i - x3r;
1762 a[j3] = wk3i * x0r + wk3r * x0i;
1763 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
1764 x0r = a[j0 - 2] + a[j2 - 2];
1765 x0i = -a[j0 - 1] - a[j2 - 1];
1766 x1r = a[j0 - 2] - a[j2 - 2];
1767 x1i = -a[j0 - 1] + a[j2 - 1];
1768 x2r = a[j1 - 2] + a[j3 - 2];
1769 x2i = a[j1 - 1] + a[j3 - 1];
1770 x3r = a[j1 - 2] - a[j3 - 2];
1771 x3i = a[j1 - 1] - a[j3 - 1];
1772 a[j0 - 2] = x0r + x2r;
1773 a[j0 - 1] = x0i - x2i;
1774 a[j1 - 2] = x0r - x2r;
1775 a[j1 - 1] = x0i + x2i;
1776 x0r = x1r + x3i;
1777 x0i = x1i + x3r;
1778 a[j2 - 2] = wd1i * x0r - wd1r * x0i;
1779 a[j2 - 1] = wd1i * x0i + wd1r * x0r;
1780 x0r = x1r - x3i;
1781 x0i = x1i - x3r;
1782 a[j3 - 2] = wd3i * x0r + wd3r * x0i;
1783 a[j3 - 1] = wd3i * x0i - wd3r * x0r;
1784 wk1r -= ss1 * wd1i;
1785 wk1i += ss1 * wd1r;
1786 wk3r -= ss3 * wd3i;
1787 wk3i += ss3 * wd3r;
1788 }
1789 if (i0 == mh - 4) {
1790 break;
1791 }
1792 wd1r = cos(ew * i0);
1793 wd1i = sin(ew * i0);
1794 wd3i = 4 * wd1i * wd1r;
1795 wd3r = wd1r - wd3i * wd1i;
1796 wd3i = wd1i - wd3i * wd1r;
1797 wk1r = w1r * wd1r - w1i * wd1i;
1798 wk1i = w1r * wd1i + w1i * wd1r;
1799 wk3i = 4 * wk1i * wk1r;
1800 wk3r = wk1r - wk3i * wk1i;
1801 wk3i = wk1i - wk3i * wk1r;
1802 i = i0;
1803 }
1804 wd1r = WR5000;
1805 j0 = mh;
1806 j1 = j0 + m;
1807 j2 = j1 + m;
1808 j3 = j2 + m;
1809 x0r = a[j0 - 2] + a[j2 - 2];
1810 x0i = -a[j0 - 1] - a[j2 - 1];
1811 x1r = a[j0 - 2] - a[j2 - 2];
1812 x1i = -a[j0 - 1] + a[j2 - 1];
1813 x2r = a[j1 - 2] + a[j3 - 2];
1814 x2i = a[j1 - 1] + a[j3 - 1];
1815 x3r = a[j1 - 2] - a[j3 - 2];
1816 x3i = a[j1 - 1] - a[j3 - 1];
1817 a[j0 - 2] = x0r + x2r;
1818 a[j0 - 1] = x0i - x2i;
1819 a[j1 - 2] = x0r - x2r;
1820 a[j1 - 1] = x0i + x2i;
1821 x0r = x1r + x3i;
1822 x0i = x1i + x3r;
1823 a[j2 - 2] = wk1r * x0r - wk1i * x0i;
1824 a[j2 - 1] = wk1r * x0i + wk1i * x0r;
1825 x0r = x1r - x3i;
1826 x0i = x1i - x3r;
1827 a[j3 - 2] = wk3r * x0r + wk3i * x0i;
1828 a[j3 - 1] = wk3r * x0i - wk3i * x0r;
1829 x0r = a[j0] + a[j2];
1830 x0i = -a[j0 + 1] - a[j2 + 1];
1831 x1r = a[j0] - a[j2];
1832 x1i = -a[j0 + 1] + a[j2 + 1];
1833 x2r = a[j1] + a[j3];
1834 x2i = a[j1 + 1] + a[j3 + 1];
1835 x3r = a[j1] - a[j3];
1836 x3i = a[j1 + 1] - a[j3 + 1];
1837 a[j0] = x0r + x2r;
1838 a[j0 + 1] = x0i - x2i;
1839 a[j1] = x0r - x2r;
1840 a[j1 + 1] = x0i + x2i;
1841 x0r = x1r + x3i;
1842 x0i = x1i + x3r;
1843 a[j2] = wd1r * (x0r - x0i);
1844 a[j2 + 1] = wd1r * (x0i + x0r);
1845 x0r = x1r - x3i;
1846 x0i = x1i - x3r;
1847 a[j3] = -wd1r * (x0r + x0i);
1848 a[j3 + 1] = -wd1r * (x0i - x0r);
1849 x0r = a[j0 + 2] + a[j2 + 2];
1850 x0i = -a[j0 + 3] - a[j2 + 3];
1851 x1r = a[j0 + 2] - a[j2 + 2];
1852 x1i = -a[j0 + 3] + a[j2 + 3];
1853 x2r = a[j1 + 2] + a[j3 + 2];
1854 x2i = a[j1 + 3] + a[j3 + 3];
1855 x3r = a[j1 + 2] - a[j3 + 2];
1856 x3i = a[j1 + 3] - a[j3 + 3];
1857 a[j0 + 2] = x0r + x2r;
1858 a[j0 + 3] = x0i - x2i;
1859 a[j1 + 2] = x0r - x2r;
1860 a[j1 + 3] = x0i + x2i;
1861 x0r = x1r + x3i;
1862 x0i = x1i + x3r;
1863 a[j2 + 2] = wk1i * x0r - wk1r * x0i;
1864 a[j2 + 3] = wk1i * x0i + wk1r * x0r;
1865 x0r = x1r - x3i;
1866 x0i = x1i - x3r;
1867 a[j3 + 2] = wk3i * x0r + wk3r * x0i;
1868 a[j3 + 3] = wk3i * x0i - wk3r * x0r;
1869 }
1870
1871#ifdef USE_CDFT_THREADS
1872 struct cdft_arg_st {
1873 int n0;
1874 int n;
1875 float* a;
1876 };
1877 typedef struct cdft_arg_st cdft_arg_t;
1878
1879 __inline void cftrec4_th(int n, float* a) {
1880 void* cftrec1_th(void* p);
1881 void* cftrec2_th(void* p);
1882 int i, idiv4, m, nthread;
1883 cdft_thread_t th[4];
1884 cdft_arg_t ag[4];
1885
1886 nthread = 2;
1887 idiv4 = 0;
1888 m = n >> 1;
1889 if (n > CDFT_4THREADS_BEGIN_N) {
1890 nthread = 4;
1891 idiv4 = 1;
1892 m >>= 1;
1893 }
1894 for (i = 0; i < nthread; i++) {
1895 ag[i].n0 = n;
1896 ag[i].n = m;
1897 ag[i].a = &a[i * m];
1898 if (i != idiv4) {
1899 cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
1900 } else {
1901 cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
1902 }
1903 }
1904 for (i = 0; i < nthread; i++) {
1905 cdft_thread_wait(th[i]);
1906 }
1907 }
1908
1909 __inline void* cftrec1_th(void* p) {
1910 int cfttree(int n, int j, int k, float* a);
1911 void cftleaf(int n, int isplt, float* a);
1912 void cftmdl1(int n, float* a);
1913 int isplt, j, k, m, n, n0;
1914 float* a;
1915
1916 n0 = ((cdft_arg_t*)p)->n0;
1917 n = ((cdft_arg_t*)p)->n;
1918 a = ((cdft_arg_t*)p)->a;
1919 m = n0;
1920 while (m > 512) {
1921 m >>= 2;
1922 cftmdl1(m, &a[n - m]);
1923 }
1924 cftleaf(m, 1, &a[n - m]);
1925 k = 0;
1926 for (j = n - m; j > 0; j -= m) {
1927 k++;
1928 isplt = cfttree(m, j, k, a);
1929 cftleaf(m, isplt, &a[j - m]);
1930 }
1931 return (void*)0;
1932 }
1933
1934 __inline void* cftrec2_th(void* p) {
1935 int cfttree(int n, int j, int k, float* a);
1936 void cftleaf(int n, int isplt, float* a);
1937 void cftmdl2(int n, float* a);
1938 int isplt, j, k, m, n, n0;
1939 float* a;
1940
1941 n0 = ((cdft_arg_t*)p)->n0;
1942 n = ((cdft_arg_t*)p)->n;
1943 a = ((cdft_arg_t*)p)->a;
1944 k = 1;
1945 m = n0;
1946 while (m > 512) {
1947 m >>= 2;
1948 k <<= 2;
1949 cftmdl2(m, &a[n - m]);
1950 }
1951 cftleaf(m, 0, &a[n - m]);
1952 k >>= 1;
1953 for (j = n - m; j > 0; j -= m) {
1954 k++;
1955 isplt = cfttree(m, j, k, a);
1956 cftleaf(m, isplt, &a[j - m]);
1957 }
1958 return (void*)0;
1959 }
1960#endif /* USE_CDFT_THREADS */
1961
1962 __inline void cftrec4(int n, float* a) {
1963 /*int cfttree(int n, int j, int k, float *a);
1964 void cftleaf(int n, int isplt, float *a);
1965 void cftmdl1(int n, float *a);*/
1966 int isplt, j, k, m;
1967
1968 m = n;
1969 while (m > 512) {
1970 m >>= 2;
1971 cftmdl1(m, &a[n - m]);
1972 }
1973 cftleaf(m, 1, &a[n - m]);
1974 k = 0;
1975 for (j = n - m; j > 0; j -= m) {
1976 k++;
1977 isplt = cfttree(m, j, k, a);
1978 cftleaf(m, isplt, &a[j - m]);
1979 }
1980 }
1981
1982 __inline int cfttree(int n, int j, int k, float* a) {
1983 /*void cftmdl1(int n, float *a);
1984 void cftmdl2(int n, float *a);*/
1985 int i, isplt, m;
1986
1987 if ((k & 3) != 0) {
1988 isplt = k & 1;
1989 if (isplt != 0) {
1990 cftmdl1(n, &a[j - n]);
1991 } else {
1992 cftmdl2(n, &a[j - n]);
1993 }
1994 } else {
1995 m = n;
1996 for (i = k; (i & 3) == 0; i >>= 2) {
1997 m <<= 2;
1998 }
1999 isplt = i & 1;
2000 if (isplt != 0) {
2001 while (m > 128) {
2002 cftmdl1(m, &a[j - m]);
2003 m >>= 2;
2004 }
2005 } else {
2006 while (m > 128) {
2007 cftmdl2(m, &a[j - m]);
2008 m >>= 2;
2009 }
2010 }
2011 }
2012 return isplt;
2013 }
2014
2015 __inline void cftleaf(int n, int isplt, float* a) {
2016 /*void cftmdl1(int n, float *a);
2017 void cftmdl2(int n, float *a);
2018 void cftf161(float *a);
2019 void cftf162(float *a);
2020 void cftf081(float *a);
2021 void cftf082(float *a);*/
2022
2023 if (n == 512) {
2024 cftmdl1(128, a);
2025 cftf161(a);
2026 cftf162(&a[32]);
2027 cftf161(&a[64]);
2028 cftf161(&a[96]);
2029 cftmdl2(128, &a[128]);
2030 cftf161(&a[128]);
2031 cftf162(&a[160]);
2032 cftf161(&a[192]);
2033 cftf162(&a[224]);
2034 cftmdl1(128, &a[256]);
2035 cftf161(&a[256]);
2036 cftf162(&a[288]);
2037 cftf161(&a[320]);
2038 cftf161(&a[352]);
2039 if (isplt != 0) {
2040 cftmdl1(128, &a[384]);
2041 cftf161(&a[480]);
2042 } else {
2043 cftmdl2(128, &a[384]);
2044 cftf162(&a[480]);
2045 }
2046 cftf161(&a[384]);
2047 cftf162(&a[416]);
2048 cftf161(&a[448]);
2049 } else {
2050 cftmdl1(64, a);
2051 cftf081(a);
2052 cftf082(&a[16]);
2053 cftf081(&a[32]);
2054 cftf081(&a[48]);
2055 cftmdl2(64, &a[64]);
2056 cftf081(&a[64]);
2057 cftf082(&a[80]);
2058 cftf081(&a[96]);
2059 cftf082(&a[112]);
2060 cftmdl1(64, &a[128]);
2061 cftf081(&a[128]);
2062 cftf082(&a[144]);
2063 cftf081(&a[160]);
2064 cftf081(&a[176]);
2065 if (isplt != 0) {
2066 cftmdl1(64, &a[192]);
2067 cftf081(&a[240]);
2068 } else {
2069 cftmdl2(64, &a[192]);
2070 cftf082(&a[240]);
2071 }
2072 cftf081(&a[192]);
2073 cftf082(&a[208]);
2074 cftf081(&a[224]);
2075 }
2076 }
2077
2078 __inline void cftmdl1(int n, float* a) {
2079 int i, i0, j, j0, j1, j2, j3, m, mh;
2080 float ew, w1r, w1i, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i, ss1,
2081 ss3;
2082 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2083
2084 mh = n >> 3;
2085 m = 2 * mh;
2086 j1 = m;
2087 j2 = j1 + m;
2088 j3 = j2 + m;
2089 x0r = a[0] + a[j2];
2090 x0i = a[1] + a[j2 + 1];
2091 x1r = a[0] - a[j2];
2092 x1i = a[1] - a[j2 + 1];
2093 x2r = a[j1] + a[j3];
2094 x2i = a[j1 + 1] + a[j3 + 1];
2095 x3r = a[j1] - a[j3];
2096 x3i = a[j1 + 1] - a[j3 + 1];
2097 a[0] = x0r + x2r;
2098 a[1] = x0i + x2i;
2099 a[j1] = x0r - x2r;
2100 a[j1 + 1] = x0i - x2i;
2101 a[j2] = x1r - x3i;
2102 a[j2 + 1] = x1i + x3r;
2103 a[j3] = x1r + x3i;
2104 a[j3 + 1] = x1i - x3r;
2105 wd1r = 1;
2106 wd1i = 0;
2107 wd3r = 1;
2108 wd3i = 0;
2109 ew = M_PI_2 / m;
2110 w1r = cos(2 * ew);
2111 w1i = sin(2 * ew);
2112 wk1r = w1r;
2113 wk1i = w1i;
2114 ss1 = 2 * w1i;
2115 wk3i = 2 * ss1 * wk1r;
2116 wk3r = wk1r - wk3i * wk1i;
2117 wk3i = wk1i - wk3i * wk1r;
2118 ss3 = 2 * wk3i;
2119 i = 0;
2120 for (;;) {
2121 i0 = i + 4 * CDFT_LOOP_DIV;
2122 if (i0 > mh - 4) {
2123 i0 = mh - 4;
2124 }
2125 for (j = i + 2; j < i0; j += 4) {
2126 wd1r -= ss1 * wk1i;
2127 wd1i += ss1 * wk1r;
2128 wd3r -= ss3 * wk3i;
2129 wd3i += ss3 * wk3r;
2130 j1 = j + m;
2131 j2 = j1 + m;
2132 j3 = j2 + m;
2133 x0r = a[j] + a[j2];
2134 x0i = a[j + 1] + a[j2 + 1];
2135 x1r = a[j] - a[j2];
2136 x1i = a[j + 1] - a[j2 + 1];
2137 x2r = a[j1] + a[j3];
2138 x2i = a[j1 + 1] + a[j3 + 1];
2139 x3r = a[j1] - a[j3];
2140 x3i = a[j1 + 1] - a[j3 + 1];
2141 a[j] = x0r + x2r;
2142 a[j + 1] = x0i + x2i;
2143 a[j1] = x0r - x2r;
2144 a[j1 + 1] = x0i - x2i;
2145 x0r = x1r - x3i;
2146 x0i = x1i + x3r;
2147 a[j2] = wk1r * x0r - wk1i * x0i;
2148 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2149 x0r = x1r + x3i;
2150 x0i = x1i - x3r;
2151 a[j3] = wk3r * x0r + wk3i * x0i;
2152 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2153 x0r = a[j + 2] + a[j2 + 2];
2154 x0i = a[j + 3] + a[j2 + 3];
2155 x1r = a[j + 2] - a[j2 + 2];
2156 x1i = a[j + 3] - a[j2 + 3];
2157 x2r = a[j1 + 2] + a[j3 + 2];
2158 x2i = a[j1 + 3] + a[j3 + 3];
2159 x3r = a[j1 + 2] - a[j3 + 2];
2160 x3i = a[j1 + 3] - a[j3 + 3];
2161 a[j + 2] = x0r + x2r;
2162 a[j + 3] = x0i + x2i;
2163 a[j1 + 2] = x0r - x2r;
2164 a[j1 + 3] = x0i - x2i;
2165 x0r = x1r - x3i;
2166 x0i = x1i + x3r;
2167 a[j2 + 2] = wd1r * x0r - wd1i * x0i;
2168 a[j2 + 3] = wd1r * x0i + wd1i * x0r;
2169 x0r = x1r + x3i;
2170 x0i = x1i - x3r;
2171 a[j3 + 2] = wd3r * x0r + wd3i * x0i;
2172 a[j3 + 3] = wd3r * x0i - wd3i * x0r;
2173 j0 = m - j;
2174 j1 = j0 + m;
2175 j2 = j1 + m;
2176 j3 = j2 + m;
2177 x0r = a[j0] + a[j2];
2178 x0i = a[j0 + 1] + a[j2 + 1];
2179 x1r = a[j0] - a[j2];
2180 x1i = a[j0 + 1] - a[j2 + 1];
2181 x2r = a[j1] + a[j3];
2182 x2i = a[j1 + 1] + a[j3 + 1];
2183 x3r = a[j1] - a[j3];
2184 x3i = a[j1 + 1] - a[j3 + 1];
2185 a[j0] = x0r + x2r;
2186 a[j0 + 1] = x0i + x2i;
2187 a[j1] = x0r - x2r;
2188 a[j1 + 1] = x0i - x2i;
2189 x0r = x1r - x3i;
2190 x0i = x1i + x3r;
2191 a[j2] = wk1i * x0r - wk1r * x0i;
2192 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2193 x0r = x1r + x3i;
2194 x0i = x1i - x3r;
2195 a[j3] = wk3i * x0r + wk3r * x0i;
2196 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2197 x0r = a[j0 - 2] + a[j2 - 2];
2198 x0i = a[j0 - 1] + a[j2 - 1];
2199 x1r = a[j0 - 2] - a[j2 - 2];
2200 x1i = a[j0 - 1] - a[j2 - 1];
2201 x2r = a[j1 - 2] + a[j3 - 2];
2202 x2i = a[j1 - 1] + a[j3 - 1];
2203 x3r = a[j1 - 2] - a[j3 - 2];
2204 x3i = a[j1 - 1] - a[j3 - 1];
2205 a[j0 - 2] = x0r + x2r;
2206 a[j0 - 1] = x0i + x2i;
2207 a[j1 - 2] = x0r - x2r;
2208 a[j1 - 1] = x0i - x2i;
2209 x0r = x1r - x3i;
2210 x0i = x1i + x3r;
2211 a[j2 - 2] = wd1i * x0r - wd1r * x0i;
2212 a[j2 - 1] = wd1i * x0i + wd1r * x0r;
2213 x0r = x1r + x3i;
2214 x0i = x1i - x3r;
2215 a[j3 - 2] = wd3i * x0r + wd3r * x0i;
2216 a[j3 - 1] = wd3i * x0i - wd3r * x0r;
2217 wk1r -= ss1 * wd1i;
2218 wk1i += ss1 * wd1r;
2219 wk3r -= ss3 * wd3i;
2220 wk3i += ss3 * wd3r;
2221 }
2222 if (i0 == mh - 4) {
2223 break;
2224 }
2225 wd1r = cos(ew * i0);
2226 wd1i = sin(ew * i0);
2227 wd3i = 4 * wd1i * wd1r;
2228 wd3r = wd1r - wd3i * wd1i;
2229 wd3i = wd1i - wd3i * wd1r;
2230 wk1r = w1r * wd1r - w1i * wd1i;
2231 wk1i = w1r * wd1i + w1i * wd1r;
2232 wk3i = 4 * wk1i * wk1r;
2233 wk3r = wk1r - wk3i * wk1i;
2234 wk3i = wk1i - wk3i * wk1r;
2235 i = i0;
2236 }
2237 wd1r = WR5000;
2238 j0 = mh;
2239 j1 = j0 + m;
2240 j2 = j1 + m;
2241 j3 = j2 + m;
2242 x0r = a[j0 - 2] + a[j2 - 2];
2243 x0i = a[j0 - 1] + a[j2 - 1];
2244 x1r = a[j0 - 2] - a[j2 - 2];
2245 x1i = a[j0 - 1] - a[j2 - 1];
2246 x2r = a[j1 - 2] + a[j3 - 2];
2247 x2i = a[j1 - 1] + a[j3 - 1];
2248 x3r = a[j1 - 2] - a[j3 - 2];
2249 x3i = a[j1 - 1] - a[j3 - 1];
2250 a[j0 - 2] = x0r + x2r;
2251 a[j0 - 1] = x0i + x2i;
2252 a[j1 - 2] = x0r - x2r;
2253 a[j1 - 1] = x0i - x2i;
2254 x0r = x1r - x3i;
2255 x0i = x1i + x3r;
2256 a[j2 - 2] = wk1r * x0r - wk1i * x0i;
2257 a[j2 - 1] = wk1r * x0i + wk1i * x0r;
2258 x0r = x1r + x3i;
2259 x0i = x1i - x3r;
2260 a[j3 - 2] = wk3r * x0r + wk3i * x0i;
2261 a[j3 - 1] = wk3r * x0i - wk3i * x0r;
2262 x0r = a[j0] + a[j2];
2263 x0i = a[j0 + 1] + a[j2 + 1];
2264 x1r = a[j0] - a[j2];
2265 x1i = a[j0 + 1] - a[j2 + 1];
2266 x2r = a[j1] + a[j3];
2267 x2i = a[j1 + 1] + a[j3 + 1];
2268 x3r = a[j1] - a[j3];
2269 x3i = a[j1 + 1] - a[j3 + 1];
2270 a[j0] = x0r + x2r;
2271 a[j0 + 1] = x0i + x2i;
2272 a[j1] = x0r - x2r;
2273 a[j1 + 1] = x0i - x2i;
2274 x0r = x1r - x3i;
2275 x0i = x1i + x3r;
2276 a[j2] = wd1r * (x0r - x0i);
2277 a[j2 + 1] = wd1r * (x0i + x0r);
2278 x0r = x1r + x3i;
2279 x0i = x1i - x3r;
2280 a[j3] = -wd1r * (x0r + x0i);
2281 a[j3 + 1] = -wd1r * (x0i - x0r);
2282 x0r = a[j0 + 2] + a[j2 + 2];
2283 x0i = a[j0 + 3] + a[j2 + 3];
2284 x1r = a[j0 + 2] - a[j2 + 2];
2285 x1i = a[j0 + 3] - a[j2 + 3];
2286 x2r = a[j1 + 2] + a[j3 + 2];
2287 x2i = a[j1 + 3] + a[j3 + 3];
2288 x3r = a[j1 + 2] - a[j3 + 2];
2289 x3i = a[j1 + 3] - a[j3 + 3];
2290 a[j0 + 2] = x0r + x2r;
2291 a[j0 + 3] = x0i + x2i;
2292 a[j1 + 2] = x0r - x2r;
2293 a[j1 + 3] = x0i - x2i;
2294 x0r = x1r - x3i;
2295 x0i = x1i + x3r;
2296 a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2297 a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2298 x0r = x1r + x3i;
2299 x0i = x1i - x3r;
2300 a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2301 a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2302 }
2303
2304 __inline void cftmdl2(int n, float* a) {
2305 int i, i0, j, j0, j1, j2, j3, m, mh;
2306 float ew, w1r, w1i, wn4r, wk1r, wk1i, wk3r, wk3i, wl1r, wl1i, wl3r, wl3i,
2307 wd1r, wd1i, wd3r, wd3i, we1r, we1i, we3r, we3i, ss1, ss3;
2308 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
2309
2310 mh = n >> 3;
2311 m = 2 * mh;
2312 wn4r = WR5000;
2313 j1 = m;
2314 j2 = j1 + m;
2315 j3 = j2 + m;
2316 x0r = a[0] - a[j2 + 1];
2317 x0i = a[1] + a[j2];
2318 x1r = a[0] + a[j2 + 1];
2319 x1i = a[1] - a[j2];
2320 x2r = a[j1] - a[j3 + 1];
2321 x2i = a[j1 + 1] + a[j3];
2322 x3r = a[j1] + a[j3 + 1];
2323 x3i = a[j1 + 1] - a[j3];
2324 y0r = wn4r * (x2r - x2i);
2325 y0i = wn4r * (x2i + x2r);
2326 a[0] = x0r + y0r;
2327 a[1] = x0i + y0i;
2328 a[j1] = x0r - y0r;
2329 a[j1 + 1] = x0i - y0i;
2330 y0r = wn4r * (x3r - x3i);
2331 y0i = wn4r * (x3i + x3r);
2332 a[j2] = x1r - y0i;
2333 a[j2 + 1] = x1i + y0r;
2334 a[j3] = x1r + y0i;
2335 a[j3 + 1] = x1i - y0r;
2336 wl1r = 1;
2337 wl1i = 0;
2338 wl3r = 1;
2339 wl3i = 0;
2340 we1r = wn4r;
2341 we1i = wn4r;
2342 we3r = -wn4r;
2343 we3i = -wn4r;
2344 ew = M_PI_2 / (2 * m);
2345 w1r = cos(2 * ew);
2346 w1i = sin(2 * ew);
2347 wk1r = w1r;
2348 wk1i = w1i;
2349 wd1r = wn4r * (w1r - w1i);
2350 wd1i = wn4r * (w1i + w1r);
2351 ss1 = 2 * w1i;
2352 wk3i = 2 * ss1 * wk1r;
2353 wk3r = wk1r - wk3i * wk1i;
2354 wk3i = wk1i - wk3i * wk1r;
2355 ss3 = 2 * wk3i;
2356 wd3r = -wn4r * (wk3r - wk3i);
2357 wd3i = -wn4r * (wk3i + wk3r);
2358 i = 0;
2359 for (;;) {
2360 i0 = i + 4 * CDFT_LOOP_DIV;
2361 if (i0 > mh - 4) {
2362 i0 = mh - 4;
2363 }
2364 for (j = i + 2; j < i0; j += 4) {
2365 wl1r -= ss1 * wk1i;
2366 wl1i += ss1 * wk1r;
2367 wl3r -= ss3 * wk3i;
2368 wl3i += ss3 * wk3r;
2369 we1r -= ss1 * wd1i;
2370 we1i += ss1 * wd1r;
2371 we3r -= ss3 * wd3i;
2372 we3i += ss3 * wd3r;
2373 j1 = j + m;
2374 j2 = j1 + m;
2375 j3 = j2 + m;
2376 x0r = a[j] - a[j2 + 1];
2377 x0i = a[j + 1] + a[j2];
2378 x1r = a[j] + a[j2 + 1];
2379 x1i = a[j + 1] - a[j2];
2380 x2r = a[j1] - a[j3 + 1];
2381 x2i = a[j1 + 1] + a[j3];
2382 x3r = a[j1] + a[j3 + 1];
2383 x3i = a[j1 + 1] - a[j3];
2384 y0r = wk1r * x0r - wk1i * x0i;
2385 y0i = wk1r * x0i + wk1i * x0r;
2386 y2r = wd1r * x2r - wd1i * x2i;
2387 y2i = wd1r * x2i + wd1i * x2r;
2388 a[j] = y0r + y2r;
2389 a[j + 1] = y0i + y2i;
2390 a[j1] = y0r - y2r;
2391 a[j1 + 1] = y0i - y2i;
2392 y0r = wk3r * x1r + wk3i * x1i;
2393 y0i = wk3r * x1i - wk3i * x1r;
2394 y2r = wd3r * x3r + wd3i * x3i;
2395 y2i = wd3r * x3i - wd3i * x3r;
2396 a[j2] = y0r + y2r;
2397 a[j2 + 1] = y0i + y2i;
2398 a[j3] = y0r - y2r;
2399 a[j3 + 1] = y0i - y2i;
2400 x0r = a[j + 2] - a[j2 + 3];
2401 x0i = a[j + 3] + a[j2 + 2];
2402 x1r = a[j + 2] + a[j2 + 3];
2403 x1i = a[j + 3] - a[j2 + 2];
2404 x2r = a[j1 + 2] - a[j3 + 3];
2405 x2i = a[j1 + 3] + a[j3 + 2];
2406 x3r = a[j1 + 2] + a[j3 + 3];
2407 x3i = a[j1 + 3] - a[j3 + 2];
2408 y0r = wl1r * x0r - wl1i * x0i;
2409 y0i = wl1r * x0i + wl1i * x0r;
2410 y2r = we1r * x2r - we1i * x2i;
2411 y2i = we1r * x2i + we1i * x2r;
2412 a[j + 2] = y0r + y2r;
2413 a[j + 3] = y0i + y2i;
2414 a[j1 + 2] = y0r - y2r;
2415 a[j1 + 3] = y0i - y2i;
2416 y0r = wl3r * x1r + wl3i * x1i;
2417 y0i = wl3r * x1i - wl3i * x1r;
2418 y2r = we3r * x3r + we3i * x3i;
2419 y2i = we3r * x3i - we3i * x3r;
2420 a[j2 + 2] = y0r + y2r;
2421 a[j2 + 3] = y0i + y2i;
2422 a[j3 + 2] = y0r - y2r;
2423 a[j3 + 3] = y0i - y2i;
2424 j0 = m - j;
2425 j1 = j0 + m;
2426 j2 = j1 + m;
2427 j3 = j2 + m;
2428 x0r = a[j0] - a[j2 + 1];
2429 x0i = a[j0 + 1] + a[j2];
2430 x1r = a[j0] + a[j2 + 1];
2431 x1i = a[j0 + 1] - a[j2];
2432 x2r = a[j1] - a[j3 + 1];
2433 x2i = a[j1 + 1] + a[j3];
2434 x3r = a[j1] + a[j3 + 1];
2435 x3i = a[j1 + 1] - a[j3];
2436 y0r = wd1i * x0r - wd1r * x0i;
2437 y0i = wd1i * x0i + wd1r * x0r;
2438 y2r = wk1i * x2r - wk1r * x2i;
2439 y2i = wk1i * x2i + wk1r * x2r;
2440 a[j0] = y0r + y2r;
2441 a[j0 + 1] = y0i + y2i;
2442 a[j1] = y0r - y2r;
2443 a[j1 + 1] = y0i - y2i;
2444 y0r = wd3i * x1r + wd3r * x1i;
2445 y0i = wd3i * x1i - wd3r * x1r;
2446 y2r = wk3i * x3r + wk3r * x3i;
2447 y2i = wk3i * x3i - wk3r * x3r;
2448 a[j2] = y0r + y2r;
2449 a[j2 + 1] = y0i + y2i;
2450 a[j3] = y0r - y2r;
2451 a[j3 + 1] = y0i - y2i;
2452 x0r = a[j0 - 2] - a[j2 - 1];
2453 x0i = a[j0 - 1] + a[j2 - 2];
2454 x1r = a[j0 - 2] + a[j2 - 1];
2455 x1i = a[j0 - 1] - a[j2 - 2];
2456 x2r = a[j1 - 2] - a[j3 - 1];
2457 x2i = a[j1 - 1] + a[j3 - 2];
2458 x3r = a[j1 - 2] + a[j3 - 1];
2459 x3i = a[j1 - 1] - a[j3 - 2];
2460 y0r = we1i * x0r - we1r * x0i;
2461 y0i = we1i * x0i + we1r * x0r;
2462 y2r = wl1i * x2r - wl1r * x2i;
2463 y2i = wl1i * x2i + wl1r * x2r;
2464 a[j0 - 2] = y0r + y2r;
2465 a[j0 - 1] = y0i + y2i;
2466 a[j1 - 2] = y0r - y2r;
2467 a[j1 - 1] = y0i - y2i;
2468 y0r = we3i * x1r + we3r * x1i;
2469 y0i = we3i * x1i - we3r * x1r;
2470 y2r = wl3i * x3r + wl3r * x3i;
2471 y2i = wl3i * x3i - wl3r * x3r;
2472 a[j2 - 2] = y0r + y2r;
2473 a[j2 - 1] = y0i + y2i;
2474 a[j3 - 2] = y0r - y2r;
2475 a[j3 - 1] = y0i - y2i;
2476 wk1r -= ss1 * wl1i;
2477 wk1i += ss1 * wl1r;
2478 wk3r -= ss3 * wl3i;
2479 wk3i += ss3 * wl3r;
2480 wd1r -= ss1 * we1i;
2481 wd1i += ss1 * we1r;
2482 wd3r -= ss3 * we3i;
2483 wd3i += ss3 * we3r;
2484 }
2485 if (i0 == mh - 4) {
2486 break;
2487 }
2488 wl1r = cos(ew * i0);
2489 wl1i = sin(ew * i0);
2490 wl3i = 4 * wl1i * wl1r;
2491 wl3r = wl1r - wl3i * wl1i;
2492 wl3i = wl1i - wl3i * wl1r;
2493 we1r = wn4r * (wl1r - wl1i);
2494 we1i = wn4r * (wl1i + wl1r);
2495 we3r = -wn4r * (wl3r - wl3i);
2496 we3i = -wn4r * (wl3i + wl3r);
2497 wk1r = w1r * wl1r - w1i * wl1i;
2498 wk1i = w1r * wl1i + w1i * wl1r;
2499 wk3i = 4 * wk1i * wk1r;
2500 wk3r = wk1r - wk3i * wk1i;
2501 wk3i = wk1i - wk3i * wk1r;
2502 wd1r = wn4r * (wk1r - wk1i);
2503 wd1i = wn4r * (wk1i + wk1r);
2504 wd3r = -wn4r * (wk3r - wk3i);
2505 wd3i = -wn4r * (wk3i + wk3r);
2506 i = i0;
2507 }
2508 wl1r = WR2500;
2509 wl1i = WI2500;
2510 j0 = mh;
2511 j1 = j0 + m;
2512 j2 = j1 + m;
2513 j3 = j2 + m;
2514 x0r = a[j0 - 2] - a[j2 - 1];
2515 x0i = a[j0 - 1] + a[j2 - 2];
2516 x1r = a[j0 - 2] + a[j2 - 1];
2517 x1i = a[j0 - 1] - a[j2 - 2];
2518 x2r = a[j1 - 2] - a[j3 - 1];
2519 x2i = a[j1 - 1] + a[j3 - 2];
2520 x3r = a[j1 - 2] + a[j3 - 1];
2521 x3i = a[j1 - 1] - a[j3 - 2];
2522 y0r = wk1r * x0r - wk1i * x0i;
2523 y0i = wk1r * x0i + wk1i * x0r;
2524 y2r = wd1r * x2r - wd1i * x2i;
2525 y2i = wd1r * x2i + wd1i * x2r;
2526 a[j0 - 2] = y0r + y2r;
2527 a[j0 - 1] = y0i + y2i;
2528 a[j1 - 2] = y0r - y2r;
2529 a[j1 - 1] = y0i - y2i;
2530 y0r = wk3r * x1r + wk3i * x1i;
2531 y0i = wk3r * x1i - wk3i * x1r;
2532 y2r = wd3r * x3r + wd3i * x3i;
2533 y2i = wd3r * x3i - wd3i * x3r;
2534 a[j2 - 2] = y0r + y2r;
2535 a[j2 - 1] = y0i + y2i;
2536 a[j3 - 2] = y0r - y2r;
2537 a[j3 - 1] = y0i - y2i;
2538 x0r = a[j0] - a[j2 + 1];
2539 x0i = a[j0 + 1] + a[j2];
2540 x1r = a[j0] + a[j2 + 1];
2541 x1i = a[j0 + 1] - a[j2];
2542 x2r = a[j1] - a[j3 + 1];
2543 x2i = a[j1 + 1] + a[j3];
2544 x3r = a[j1] + a[j3 + 1];
2545 x3i = a[j1 + 1] - a[j3];
2546 y0r = wl1r * x0r - wl1i * x0i;
2547 y0i = wl1r * x0i + wl1i * x0r;
2548 y2r = wl1i * x2r - wl1r * x2i;
2549 y2i = wl1i * x2i + wl1r * x2r;
2550 a[j0] = y0r + y2r;
2551 a[j0 + 1] = y0i + y2i;
2552 a[j1] = y0r - y2r;
2553 a[j1 + 1] = y0i - y2i;
2554 y0r = wl1i * x1r - wl1r * x1i;
2555 y0i = wl1i * x1i + wl1r * x1r;
2556 y2r = wl1r * x3r - wl1i * x3i;
2557 y2i = wl1r * x3i + wl1i * x3r;
2558 a[j2] = y0r - y2r;
2559 a[j2 + 1] = y0i - y2i;
2560 a[j3] = y0r + y2r;
2561 a[j3 + 1] = y0i + y2i;
2562 x0r = a[j0 + 2] - a[j2 + 3];
2563 x0i = a[j0 + 3] + a[j2 + 2];
2564 x1r = a[j0 + 2] + a[j2 + 3];
2565 x1i = a[j0 + 3] - a[j2 + 2];
2566 x2r = a[j1 + 2] - a[j3 + 3];
2567 x2i = a[j1 + 3] + a[j3 + 2];
2568 x3r = a[j1 + 2] + a[j3 + 3];
2569 x3i = a[j1 + 3] - a[j3 + 2];
2570 y0r = wd1i * x0r - wd1r * x0i;
2571 y0i = wd1i * x0i + wd1r * x0r;
2572 y2r = wk1i * x2r - wk1r * x2i;
2573 y2i = wk1i * x2i + wk1r * x2r;
2574 a[j0 + 2] = y0r + y2r;
2575 a[j0 + 3] = y0i + y2i;
2576 a[j1 + 2] = y0r - y2r;
2577 a[j1 + 3] = y0i - y2i;
2578 y0r = wd3i * x1r + wd3r * x1i;
2579 y0i = wd3i * x1i - wd3r * x1r;
2580 y2r = wk3i * x3r + wk3r * x3i;
2581 y2i = wk3i * x3i - wk3r * x3r;
2582 a[j2 + 2] = y0r + y2r;
2583 a[j2 + 3] = y0i + y2i;
2584 a[j3 + 2] = y0r - y2r;
2585 a[j3 + 3] = y0i - y2i;
2586 }
2587
2588 __inline void cftfx41(int n, float* a) {
2589 /*void cftf161(float *a);
2590 void cftf162(float *a);
2591 void cftf081(float *a);
2592 void cftf082(float *a);*/
2593
2594 if (n == 128) {
2595 cftf161(a);
2596 cftf162(&a[32]);
2597 cftf161(&a[64]);
2598 cftf161(&a[96]);
2599 } else {
2600 cftf081(a);
2601 cftf082(&a[16]);
2602 cftf081(&a[32]);
2603 cftf081(&a[48]);
2604 }
2605 }
2606
2607 __inline void cftf161(float* a) {
2608 float wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i,
2609 y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
2610 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i,
2611 y14r, y14i, y15r, y15i;
2612
2613 wn4r = WR5000;
2614 wk1r = WR2500;
2615 wk1i = WI2500;
2616 x0r = a[0] + a[16];
2617 x0i = a[1] + a[17];
2618 x1r = a[0] - a[16];
2619 x1i = a[1] - a[17];
2620 x2r = a[8] + a[24];
2621 x2i = a[9] + a[25];
2622 x3r = a[8] - a[24];
2623 x3i = a[9] - a[25];
2624 y0r = x0r + x2r;
2625 y0i = x0i + x2i;
2626 y4r = x0r - x2r;
2627 y4i = x0i - x2i;
2628 y8r = x1r - x3i;
2629 y8i = x1i + x3r;
2630 y12r = x1r + x3i;
2631 y12i = x1i - x3r;
2632 x0r = a[2] + a[18];
2633 x0i = a[3] + a[19];
2634 x1r = a[2] - a[18];
2635 x1i = a[3] - a[19];
2636 x2r = a[10] + a[26];
2637 x2i = a[11] + a[27];
2638 x3r = a[10] - a[26];
2639 x3i = a[11] - a[27];
2640 y1r = x0r + x2r;
2641 y1i = x0i + x2i;
2642 y5r = x0r - x2r;
2643 y5i = x0i - x2i;
2644 x0r = x1r - x3i;
2645 x0i = x1i + x3r;
2646 y9r = wk1r * x0r - wk1i * x0i;
2647 y9i = wk1r * x0i + wk1i * x0r;
2648 x0r = x1r + x3i;
2649 x0i = x1i - x3r;
2650 y13r = wk1i * x0r - wk1r * x0i;
2651 y13i = wk1i * x0i + wk1r * x0r;
2652 x0r = a[4] + a[20];
2653 x0i = a[5] + a[21];
2654 x1r = a[4] - a[20];
2655 x1i = a[5] - a[21];
2656 x2r = a[12] + a[28];
2657 x2i = a[13] + a[29];
2658 x3r = a[12] - a[28];
2659 x3i = a[13] - a[29];
2660 y2r = x0r + x2r;
2661 y2i = x0i + x2i;
2662 y6r = x0r - x2r;
2663 y6i = x0i - x2i;
2664 x0r = x1r - x3i;
2665 x0i = x1i + x3r;
2666 y10r = wn4r * (x0r - x0i);
2667 y10i = wn4r * (x0i + x0r);
2668 x0r = x1r + x3i;
2669 x0i = x1i - x3r;
2670 y14r = wn4r * (x0r + x0i);
2671 y14i = wn4r * (x0i - x0r);
2672 x0r = a[6] + a[22];
2673 x0i = a[7] + a[23];
2674 x1r = a[6] - a[22];
2675 x1i = a[7] - a[23];
2676 x2r = a[14] + a[30];
2677 x2i = a[15] + a[31];
2678 x3r = a[14] - a[30];
2679 x3i = a[15] - a[31];
2680 y3r = x0r + x2r;
2681 y3i = x0i + x2i;
2682 y7r = x0r - x2r;
2683 y7i = x0i - x2i;
2684 x0r = x1r - x3i;
2685 x0i = x1i + x3r;
2686 y11r = wk1i * x0r - wk1r * x0i;
2687 y11i = wk1i * x0i + wk1r * x0r;
2688 x0r = x1r + x3i;
2689 x0i = x1i - x3r;
2690 y15r = wk1r * x0r - wk1i * x0i;
2691 y15i = wk1r * x0i + wk1i * x0r;
2692 x0r = y12r - y14r;
2693 x0i = y12i - y14i;
2694 x1r = y12r + y14r;
2695 x1i = y12i + y14i;
2696 x2r = y13r - y15r;
2697 x2i = y13i - y15i;
2698 x3r = y13r + y15r;
2699 x3i = y13i + y15i;
2700 a[24] = x0r + x2r;
2701 a[25] = x0i + x2i;
2702 a[26] = x0r - x2r;
2703 a[27] = x0i - x2i;
2704 a[28] = x1r - x3i;
2705 a[29] = x1i + x3r;
2706 a[30] = x1r + x3i;
2707 a[31] = x1i - x3r;
2708 x0r = y8r + y10r;
2709 x0i = y8i + y10i;
2710 x1r = y8r - y10r;
2711 x1i = y8i - y10i;
2712 x2r = y9r + y11r;
2713 x2i = y9i + y11i;
2714 x3r = y9r - y11r;
2715 x3i = y9i - y11i;
2716 a[16] = x0r + x2r;
2717 a[17] = x0i + x2i;
2718 a[18] = x0r - x2r;
2719 a[19] = x0i - x2i;
2720 a[20] = x1r - x3i;
2721 a[21] = x1i + x3r;
2722 a[22] = x1r + x3i;
2723 a[23] = x1i - x3r;
2724 x0r = y5r - y7i;
2725 x0i = y5i + y7r;
2726 x2r = wn4r * (x0r - x0i);
2727 x2i = wn4r * (x0i + x0r);
2728 x0r = y5r + y7i;
2729 x0i = y5i - y7r;
2730 x3r = wn4r * (x0r - x0i);
2731 x3i = wn4r * (x0i + x0r);
2732 x0r = y4r - y6i;
2733 x0i = y4i + y6r;
2734 x1r = y4r + y6i;
2735 x1i = y4i - y6r;
2736 a[8] = x0r + x2r;
2737 a[9] = x0i + x2i;
2738 a[10] = x0r - x2r;
2739 a[11] = x0i - x2i;
2740 a[12] = x1r - x3i;
2741 a[13] = x1i + x3r;
2742 a[14] = x1r + x3i;
2743 a[15] = x1i - x3r;
2744 x0r = y0r + y2r;
2745 x0i = y0i + y2i;
2746 x1r = y0r - y2r;
2747 x1i = y0i - y2i;
2748 x2r = y1r + y3r;
2749 x2i = y1i + y3i;
2750 x3r = y1r - y3r;
2751 x3i = y1i - y3i;
2752 a[0] = x0r + x2r;
2753 a[1] = x0i + x2i;
2754 a[2] = x0r - x2r;
2755 a[3] = x0i - x2i;
2756 a[4] = x1r - x3i;
2757 a[5] = x1i + x3r;
2758 a[6] = x1r + x3i;
2759 a[7] = x1i - x3r;
2760 }
2761
2762 __inline void cftf162(float* a) {
2763 float wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, x0r, x0i, x1r, x1i, x2r,
2764 x2i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r,
2765 y6i, y7r, y7i, y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i,
2766 y13r, y13i, y14r, y14i, y15r, y15i;
2767
2768 wn4r = WR5000;
2769 wk1r = WR1250;
2770 wk1i = WI1250;
2771 wk2r = WR2500;
2772 wk2i = WI2500;
2773 wk3r = WR3750;
2774 wk3i = WI3750;
2775 x1r = a[0] - a[17];
2776 x1i = a[1] + a[16];
2777 x0r = a[8] - a[25];
2778 x0i = a[9] + a[24];
2779 x2r = wn4r * (x0r - x0i);
2780 x2i = wn4r * (x0i + x0r);
2781 y0r = x1r + x2r;
2782 y0i = x1i + x2i;
2783 y4r = x1r - x2r;
2784 y4i = x1i - x2i;
2785 x1r = a[0] + a[17];
2786 x1i = a[1] - a[16];
2787 x0r = a[8] + a[25];
2788 x0i = a[9] - a[24];
2789 x2r = wn4r * (x0r - x0i);
2790 x2i = wn4r * (x0i + x0r);
2791 y8r = x1r - x2i;
2792 y8i = x1i + x2r;
2793 y12r = x1r + x2i;
2794 y12i = x1i - x2r;
2795 x0r = a[2] - a[19];
2796 x0i = a[3] + a[18];
2797 x1r = wk1r * x0r - wk1i * x0i;
2798 x1i = wk1r * x0i + wk1i * x0r;
2799 x0r = a[10] - a[27];
2800 x0i = a[11] + a[26];
2801 x2r = wk3i * x0r - wk3r * x0i;
2802 x2i = wk3i * x0i + wk3r * x0r;
2803 y1r = x1r + x2r;
2804 y1i = x1i + x2i;
2805 y5r = x1r - x2r;
2806 y5i = x1i - x2i;
2807 x0r = a[2] + a[19];
2808 x0i = a[3] - a[18];
2809 x1r = wk3r * x0r - wk3i * x0i;
2810 x1i = wk3r * x0i + wk3i * x0r;
2811 x0r = a[10] + a[27];
2812 x0i = a[11] - a[26];
2813 x2r = wk1r * x0r + wk1i * x0i;
2814 x2i = wk1r * x0i - wk1i * x0r;
2815 y9r = x1r - x2r;
2816 y9i = x1i - x2i;
2817 y13r = x1r + x2r;
2818 y13i = x1i + x2i;
2819 x0r = a[4] - a[21];
2820 x0i = a[5] + a[20];
2821 x1r = wk2r * x0r - wk2i * x0i;
2822 x1i = wk2r * x0i + wk2i * x0r;
2823 x0r = a[12] - a[29];
2824 x0i = a[13] + a[28];
2825 x2r = wk2i * x0r - wk2r * x0i;
2826 x2i = wk2i * x0i + wk2r * x0r;
2827 y2r = x1r + x2r;
2828 y2i = x1i + x2i;
2829 y6r = x1r - x2r;
2830 y6i = x1i - x2i;
2831 x0r = a[4] + a[21];
2832 x0i = a[5] - a[20];
2833 x1r = wk2i * x0r - wk2r * x0i;
2834 x1i = wk2i * x0i + wk2r * x0r;
2835 x0r = a[12] + a[29];
2836 x0i = a[13] - a[28];
2837 x2r = wk2r * x0r - wk2i * x0i;
2838 x2i = wk2r * x0i + wk2i * x0r;
2839 y10r = x1r - x2r;
2840 y10i = x1i - x2i;
2841 y14r = x1r + x2r;
2842 y14i = x1i + x2i;
2843 x0r = a[6] - a[23];
2844 x0i = a[7] + a[22];
2845 x1r = wk3r * x0r - wk3i * x0i;
2846 x1i = wk3r * x0i + wk3i * x0r;
2847 x0r = a[14] - a[31];
2848 x0i = a[15] + a[30];
2849 x2r = wk1i * x0r - wk1r * x0i;
2850 x2i = wk1i * x0i + wk1r * x0r;
2851 y3r = x1r + x2r;
2852 y3i = x1i + x2i;
2853 y7r = x1r - x2r;
2854 y7i = x1i - x2i;
2855 x0r = a[6] + a[23];
2856 x0i = a[7] - a[22];
2857 x1r = wk1i * x0r + wk1r * x0i;
2858 x1i = wk1i * x0i - wk1r * x0r;
2859 x0r = a[14] + a[31];
2860 x0i = a[15] - a[30];
2861 x2r = wk3i * x0r - wk3r * x0i;
2862 x2i = wk3i * x0i + wk3r * x0r;
2863 y11r = x1r + x2r;
2864 y11i = x1i + x2i;
2865 y15r = x1r - x2r;
2866 y15i = x1i - x2i;
2867 x1r = y0r + y2r;
2868 x1i = y0i + y2i;
2869 x2r = y1r + y3r;
2870 x2i = y1i + y3i;
2871 a[0] = x1r + x2r;
2872 a[1] = x1i + x2i;
2873 a[2] = x1r - x2r;
2874 a[3] = x1i - x2i;
2875 x1r = y0r - y2r;
2876 x1i = y0i - y2i;
2877 x2r = y1r - y3r;
2878 x2i = y1i - y3i;
2879 a[4] = x1r - x2i;
2880 a[5] = x1i + x2r;
2881 a[6] = x1r + x2i;
2882 a[7] = x1i - x2r;
2883 x1r = y4r - y6i;
2884 x1i = y4i + y6r;
2885 x0r = y5r - y7i;
2886 x0i = y5i + y7r;
2887 x2r = wn4r * (x0r - x0i);
2888 x2i = wn4r * (x0i + x0r);
2889 a[8] = x1r + x2r;
2890 a[9] = x1i + x2i;
2891 a[10] = x1r - x2r;
2892 a[11] = x1i - x2i;
2893 x1r = y4r + y6i;
2894 x1i = y4i - y6r;
2895 x0r = y5r + y7i;
2896 x0i = y5i - y7r;
2897 x2r = wn4r * (x0r - x0i);
2898 x2i = wn4r * (x0i + x0r);
2899 a[12] = x1r - x2i;
2900 a[13] = x1i + x2r;
2901 a[14] = x1r + x2i;
2902 a[15] = x1i - x2r;
2903 x1r = y8r + y10r;
2904 x1i = y8i + y10i;
2905 x2r = y9r - y11r;
2906 x2i = y9i - y11i;
2907 a[16] = x1r + x2r;
2908 a[17] = x1i + x2i;
2909 a[18] = x1r - x2r;
2910 a[19] = x1i - x2i;
2911 x1r = y8r - y10r;
2912 x1i = y8i - y10i;
2913 x2r = y9r + y11r;
2914 x2i = y9i + y11i;
2915 a[20] = x1r - x2i;
2916 a[21] = x1i + x2r;
2917 a[22] = x1r + x2i;
2918 a[23] = x1i - x2r;
2919 x1r = y12r - y14i;
2920 x1i = y12i + y14r;
2921 x0r = y13r + y15i;
2922 x0i = y13i - y15r;
2923 x2r = wn4r * (x0r - x0i);
2924 x2i = wn4r * (x0i + x0r);
2925 a[24] = x1r + x2r;
2926 a[25] = x1i + x2i;
2927 a[26] = x1r - x2r;
2928 a[27] = x1i - x2i;
2929 x1r = y12r + y14i;
2930 x1i = y12i - y14r;
2931 x0r = y13r - y15i;
2932 x0i = y13i + y15r;
2933 x2r = wn4r * (x0r - x0i);
2934 x2i = wn4r * (x0i + x0r);
2935 a[28] = x1r - x2i;
2936 a[29] = x1i + x2r;
2937 a[30] = x1r + x2i;
2938 a[31] = x1i - x2r;
2939 }
2940
2941 __inline void cftf081(float* a) {
2942 float wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r,
2943 y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
2944
2945 wn4r = WR5000;
2946 x0r = a[0] + a[8];
2947 x0i = a[1] + a[9];
2948 x1r = a[0] - a[8];
2949 x1i = a[1] - a[9];
2950 x2r = a[4] + a[12];
2951 x2i = a[5] + a[13];
2952 x3r = a[4] - a[12];
2953 x3i = a[5] - a[13];
2954 y0r = x0r + x2r;
2955 y0i = x0i + x2i;
2956 y2r = x0r - x2r;
2957 y2i = x0i - x2i;
2958 y1r = x1r - x3i;
2959 y1i = x1i + x3r;
2960 y3r = x1r + x3i;
2961 y3i = x1i - x3r;
2962 x0r = a[2] + a[10];
2963 x0i = a[3] + a[11];
2964 x1r = a[2] - a[10];
2965 x1i = a[3] - a[11];
2966 x2r = a[6] + a[14];
2967 x2i = a[7] + a[15];
2968 x3r = a[6] - a[14];
2969 x3i = a[7] - a[15];
2970 y4r = x0r + x2r;
2971 y4i = x0i + x2i;
2972 y6r = x0r - x2r;
2973 y6i = x0i - x2i;
2974 x0r = x1r - x3i;
2975 x0i = x1i + x3r;
2976 x2r = x1r + x3i;
2977 x2i = x1i - x3r;
2978 y5r = wn4r * (x0r - x0i);
2979 y5i = wn4r * (x0r + x0i);
2980 y7r = wn4r * (x2r - x2i);
2981 y7i = wn4r * (x2r + x2i);
2982 a[8] = y1r + y5r;
2983 a[9] = y1i + y5i;
2984 a[10] = y1r - y5r;
2985 a[11] = y1i - y5i;
2986 a[12] = y3r - y7i;
2987 a[13] = y3i + y7r;
2988 a[14] = y3r + y7i;
2989 a[15] = y3i - y7r;
2990 a[0] = y0r + y4r;
2991 a[1] = y0i + y4i;
2992 a[2] = y0r - y4r;
2993 a[3] = y0i - y4i;
2994 a[4] = y2r - y6i;
2995 a[5] = y2i + y6r;
2996 a[6] = y2r + y6i;
2997 a[7] = y2i - y6r;
2998 }
2999
3000 __inline void cftf082(float* a) {
3001 float wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, y0r, y0i, y1r, y1i, y2r, y2i,
3002 y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3003
3004 wn4r = WR5000;
3005 wk1r = WR2500;
3006 wk1i = WI2500;
3007 y0r = a[0] - a[9];
3008 y0i = a[1] + a[8];
3009 y1r = a[0] + a[9];
3010 y1i = a[1] - a[8];
3011 x0r = a[4] - a[13];
3012 x0i = a[5] + a[12];
3013 y2r = wn4r * (x0r - x0i);
3014 y2i = wn4r * (x0i + x0r);
3015 x0r = a[4] + a[13];
3016 x0i = a[5] - a[12];
3017 y3r = wn4r * (x0r - x0i);
3018 y3i = wn4r * (x0i + x0r);
3019 x0r = a[2] - a[11];
3020 x0i = a[3] + a[10];
3021 y4r = wk1r * x0r - wk1i * x0i;
3022 y4i = wk1r * x0i + wk1i * x0r;
3023 x0r = a[2] + a[11];
3024 x0i = a[3] - a[10];
3025 y5r = wk1i * x0r - wk1r * x0i;
3026 y5i = wk1i * x0i + wk1r * x0r;
3027 x0r = a[6] - a[15];
3028 x0i = a[7] + a[14];
3029 y6r = wk1i * x0r - wk1r * x0i;
3030 y6i = wk1i * x0i + wk1r * x0r;
3031 x0r = a[6] + a[15];
3032 x0i = a[7] - a[14];
3033 y7r = wk1r * x0r - wk1i * x0i;
3034 y7i = wk1r * x0i + wk1i * x0r;
3035 x0r = y0r + y2r;
3036 x0i = y0i + y2i;
3037 x1r = y4r + y6r;
3038 x1i = y4i + y6i;
3039 a[0] = x0r + x1r;
3040 a[1] = x0i + x1i;
3041 a[2] = x0r - x1r;
3042 a[3] = x0i - x1i;
3043 x0r = y0r - y2r;
3044 x0i = y0i - y2i;
3045 x1r = y4r - y6r;
3046 x1i = y4i - y6i;
3047 a[4] = x0r - x1i;
3048 a[5] = x0i + x1r;
3049 a[6] = x0r + x1i;
3050 a[7] = x0i - x1r;
3051 x0r = y1r - y3i;
3052 x0i = y1i + y3r;
3053 x1r = y5r - y7r;
3054 x1i = y5i - y7i;
3055 a[8] = x0r + x1r;
3056 a[9] = x0i + x1i;
3057 a[10] = x0r - x1r;
3058 a[11] = x0i - x1i;
3059 x0r = y1r + y3i;
3060 x0i = y1i - y3r;
3061 x1r = y5r + y7r;
3062 x1i = y5i + y7i;
3063 a[12] = x0r - x1i;
3064 a[13] = x0i + x1r;
3065 a[14] = x0r + x1i;
3066 a[15] = x0i - x1r;
3067 }
3068
3069 __inline void cftf040(float* a) {
3070 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3071
3072 x0r = a[0] + a[4];
3073 x0i = a[1] + a[5];
3074 x1r = a[0] - a[4];
3075 x1i = a[1] - a[5];
3076 x2r = a[2] + a[6];
3077 x2i = a[3] + a[7];
3078 x3r = a[2] - a[6];
3079 x3i = a[3] - a[7];
3080 a[0] = x0r + x2r;
3081 a[1] = x0i + x2i;
3082 a[2] = x1r - x3i;
3083 a[3] = x1i + x3r;
3084 a[4] = x0r - x2r;
3085 a[5] = x0i - x2i;
3086 a[6] = x1r + x3i;
3087 a[7] = x1i - x3r;
3088 }
3089
3090 __inline void cftb040(float* a) {
3091 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3092
3093 x0r = a[0] + a[4];
3094 x0i = a[1] + a[5];
3095 x1r = a[0] - a[4];
3096 x1i = a[1] - a[5];
3097 x2r = a[2] + a[6];
3098 x2i = a[3] + a[7];
3099 x3r = a[2] - a[6];
3100 x3i = a[3] - a[7];
3101 a[0] = x0r + x2r;
3102 a[1] = x0i + x2i;
3103 a[2] = x1r + x3i;
3104 a[3] = x1i - x3r;
3105 a[4] = x0r - x2r;
3106 a[5] = x0i - x2i;
3107 a[6] = x1r - x3i;
3108 a[7] = x1i + x3r;
3109 }
3110
3111 __inline void cftx020(float* a) {
3112 float x0r, x0i;
3113
3114 x0r = a[0] - a[2];
3115 x0i = a[1] - a[3];
3116 a[0] += a[2];
3117 a[1] += a[3];
3118 a[2] = x0r;
3119 a[3] = x0i;
3120 }
3121
3122 __inline void rftfsub(int n, float* a) {
3123 int i, i0, j, k;
3124 float ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3125
3126 ec = 2 * M_PI_2 / n;
3127 wkr = 0;
3128 wki = 0;
3129 wdi = cos(ec);
3130 wdr = sin(ec);
3131 wdi *= wdr;
3132 wdr *= wdr;
3133 w1r = 1 - 2 * wdr;
3134 w1i = 2 * wdi;
3135 ss = 2 * w1i;
3136 i = n >> 1;
3137 for (;;) {
3138 i0 = i - 4 * RDFT_LOOP_DIV;
3139 if (i0 < 4) {
3140 i0 = 4;
3141 }
3142 for (j = i - 4; j >= i0; j -= 4) {
3143 k = n - j;
3144 xr = a[j + 2] - a[k - 2];
3145 xi = a[j + 3] + a[k - 1];
3146 yr = wdr * xr - wdi * xi;
3147 yi = wdr * xi + wdi * xr;
3148 a[j + 2] -= yr;
3149 a[j + 3] -= yi;
3150 a[k - 2] += yr;
3151 a[k - 1] -= yi;
3152 wkr += ss * wdi;
3153 wki += ss * (0.5 - wdr);
3154 xr = a[j] - a[k];
3155 xi = a[j + 1] + a[k + 1];
3156 yr = wkr * xr - wki * xi;
3157 yi = wkr * xi + wki * xr;
3158 a[j] -= yr;
3159 a[j + 1] -= yi;
3160 a[k] += yr;
3161 a[k + 1] -= yi;
3162 wdr += ss * wki;
3163 wdi += ss * (0.5 - wkr);
3164 }
3165 if (i0 == 4) {
3166 break;
3167 }
3168 wkr = 0.5 * sin(ec * i0);
3169 wki = 0.5 * cos(ec * i0);
3170 wdr = 0.5 - (wkr * w1r - wki * w1i);
3171 wdi = wkr * w1i + wki * w1r;
3172 wkr = 0.5 - wkr;
3173 i = i0;
3174 }
3175 xr = a[2] - a[n - 2];
3176 xi = a[3] + a[n - 1];
3177 yr = wdr * xr - wdi * xi;
3178 yi = wdr * xi + wdi * xr;
3179 a[2] -= yr;
3180 a[3] -= yi;
3181 a[n - 2] += yr;
3182 a[n - 1] -= yi;
3183 }
3184
3185 __inline void rftbsub(int n, float* a) {
3186 int i, i0, j, k;
3187 float ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3188
3189 ec = 2 * M_PI_2 / n;
3190 wkr = 0;
3191 wki = 0;
3192 wdi = cos(ec);
3193 wdr = sin(ec);
3194 wdi *= wdr;
3195 wdr *= wdr;
3196 w1r = 1 - 2 * wdr;
3197 w1i = 2 * wdi;
3198 ss = 2 * w1i;
3199 i = n >> 1;
3200 for (;;) {
3201 i0 = i - 4 * RDFT_LOOP_DIV;
3202 if (i0 < 4) {
3203 i0 = 4;
3204 }
3205 for (j = i - 4; j >= i0; j -= 4) {
3206 k = n - j;
3207 xr = a[j + 2] - a[k - 2];
3208 xi = a[j + 3] + a[k - 1];
3209 yr = wdr * xr + wdi * xi;
3210 yi = wdr * xi - wdi * xr;
3211 a[j + 2] -= yr;
3212 a[j + 3] -= yi;
3213 a[k - 2] += yr;
3214 a[k - 1] -= yi;
3215 wkr += ss * wdi;
3216 wki += ss * (0.5 - wdr);
3217 xr = a[j] - a[k];
3218 xi = a[j + 1] + a[k + 1];
3219 yr = wkr * xr + wki * xi;
3220 yi = wkr * xi - wki * xr;
3221 a[j] -= yr;
3222 a[j + 1] -= yi;
3223 a[k] += yr;
3224 a[k + 1] -= yi;
3225 wdr += ss * wki;
3226 wdi += ss * (0.5 - wkr);
3227 }
3228 if (i0 == 4) {
3229 break;
3230 }
3231 wkr = 0.5 * sin(ec * i0);
3232 wki = 0.5 * cos(ec * i0);
3233 wdr = 0.5 - (wkr * w1r - wki * w1i);
3234 wdi = wkr * w1i + wki * w1r;
3235 wkr = 0.5 - wkr;
3236 i = i0;
3237 }
3238 xr = a[2] - a[n - 2];
3239 xi = a[3] + a[n - 1];
3240 yr = wdr * xr + wdi * xi;
3241 yi = wdr * xi - wdi * xr;
3242 a[2] -= yr;
3243 a[3] -= yi;
3244 a[n - 2] += yr;
3245 a[n - 1] -= yi;
3246 }
3247
3248 __inline void dctsub(int n, float* a) {
3249 int i, i0, j, k, m;
3250 float ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3251
3252 ec = M_PI_2 / n;
3253 wkr = 0.5;
3254 wki = 0.5;
3255 w1r = cos(ec);
3256 w1i = sin(ec);
3257 wdr = 0.5 * (w1r - w1i);
3258 wdi = 0.5 * (w1r + w1i);
3259 ss = 2 * w1i;
3260 m = n >> 1;
3261 i = 0;
3262 for (;;) {
3263 i0 = i + 2 * DCST_LOOP_DIV;
3264 if (i0 > m - 2) {
3265 i0 = m - 2;
3266 }
3267 for (j = i + 2; j <= i0; j += 2) {
3268 k = n - j;
3269 xr = wdi * a[j - 1] - wdr * a[k + 1];
3270 xi = wdr * a[j - 1] + wdi * a[k + 1];
3271 wkr -= ss * wdi;
3272 wki += ss * wdr;
3273 yr = wki * a[j] - wkr * a[k];
3274 yi = wkr * a[j] + wki * a[k];
3275 wdr -= ss * wki;
3276 wdi += ss * wkr;
3277 a[k + 1] = xr;
3278 a[k] = yr;
3279 a[j - 1] = xi;
3280 a[j] = yi;
3281 }
3282 if (i0 == m - 2) {
3283 break;
3284 }
3285 wdr = cos(ec * i0);
3286 wdi = sin(ec * i0);
3287 wkr = 0.5 * (wdr - wdi);
3288 wki = 0.5 * (wdr + wdi);
3289 wdr = wkr * w1r - wki * w1i;
3290 wdi = wkr * w1i + wki * w1r;
3291 i = i0;
3292 }
3293 xr = wdi * a[m - 1] - wdr * a[m + 1];
3294 a[m - 1] = wdr * a[m - 1] + wdi * a[m + 1];
3295 a[m + 1] = xr;
3296 a[m] *= WR5000;
3297 }
3298
3299 __inline void dstsub(int n, float* a) {
3300 int i, i0, j, k, m;
3301 float ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3302
3303 ec = M_PI_2 / n;
3304 wkr = 0.5;
3305 wki = 0.5;
3306 w1r = cos(ec);
3307 w1i = sin(ec);
3308 wdr = 0.5 * (w1r - w1i);
3309 wdi = 0.5 * (w1r + w1i);
3310 ss = 2 * w1i;
3311 m = n >> 1;
3312 i = 0;
3313 for (;;) {
3314 i0 = i + 2 * DCST_LOOP_DIV;
3315 if (i0 > m - 2) {
3316 i0 = m - 2;
3317 }
3318 for (j = i + 2; j <= i0; j += 2) {
3319 k = n - j;
3320 xr = wdi * a[k + 1] - wdr * a[j - 1];
3321 xi = wdr * a[k + 1] + wdi * a[j - 1];
3322 wkr -= ss * wdi;
3323 wki += ss * wdr;
3324 yr = wki * a[k] - wkr * a[j];
3325 yi = wkr * a[k] + wki * a[j];
3326 wdr -= ss * wki;
3327 wdi += ss * wkr;
3328 a[j - 1] = xr;
3329 a[j] = yr;
3330 a[k + 1] = xi;
3331 a[k] = yi;
3332 }
3333 if (i0 == m - 2) {
3334 break;
3335 }
3336 wdr = cos(ec * i0);
3337 wdi = sin(ec * i0);
3338 wkr = 0.5 * (wdr - wdi);
3339 wki = 0.5 * (wdr + wdi);
3340 wdr = wkr * w1r - wki * w1i;
3341 wdi = wkr * w1i + wki * w1r;
3342 i = i0;
3343 }
3344 xr = wdi * a[m + 1] - wdr * a[m - 1];
3345 a[m + 1] = wdr * a[m + 1] + wdi * a[m - 1];
3346 a[m - 1] = xr;
3347 a[m] *= WR5000;
3348 }
3349
3350 __inline void dctsub4(int n, float* a) {
3351 int m;
3352 float wki, wdr, wdi, xr;
3353
3354 wki = WR5000;
3355 m = n >> 1;
3356 if (m == 2) {
3357 wdr = wki * WI2500;
3358 wdi = wki * WR2500;
3359 xr = wdi * a[1] - wdr * a[3];
3360 a[1] = wdr * a[1] + wdi * a[3];
3361 a[3] = xr;
3362 }
3363 a[m] *= wki;
3364 }
3365
3366 __inline void dstsub4(int n, float* a) {
3367 int m;
3368 float wki, wdr, wdi, xr;
3369
3370 wki = WR5000;
3371 m = n >> 1;
3372 if (m == 2) {
3373 wdr = wki * WI2500;
3374 wdi = wki * WR2500;
3375 xr = wdi * a[3] - wdr * a[1];
3376 a[3] = wdr * a[3] + wdi * a[1];
3377 a[1] = xr;
3378 }
3379 a[m] *= wki;
3380 }
3381};
3382
3383#pragma clang diagnostic pop
3384
3385#endif //__fftsg__