24 #define gone 1065353216 25 #define gsine_pi_over_eight 1053028117 27 #define gcosine_pi_over_eight 1064076127 28 #define gone_half 0.5f 29 #define gsmall_number 1.e-12f 30 #define gtiny_number 1.e-20f 31 #define gfour_gamma_squared 5.8284273147583007813f 38 #define max(x, y) (x > y ? x : y) 39 #define __fadd_rn(x, y) (x + y) 40 #define __fsub_rn(x, y) (x - y) 41 #define __frsqrt_rn(x) (1.0 / sqrt(x)) 43 inline void svd(
float a11,
74 un Sa11, Sa21, Sa31, Sa12, Sa22, Sa32, Sa13, Sa23, Sa33;
75 un Su11, Su21, Su31, Su12, Su22, Su32, Su13, Su23, Su33;
76 un Sv11, Sv21, Sv31, Sv12, Sv22, Sv32, Sv13, Sv23, Sv33;
78 un Stmp1, Stmp2, Stmp3, Stmp4, Stmp5;
79 un Ss11, Ss21, Ss31, Ss22, Ss32, Ss33;
80 un Sqvs, Sqvvx, Sqvvy, Sqvvz;
96 Ss11.
f = Sa11.
f * Sa11.
f;
97 Stmp1.
f = Sa21.
f * Sa21.
f;
99 Stmp1.
f = Sa31.
f * Sa31.
f;
102 Ss21.
f = Sa12.
f * Sa11.
f;
103 Stmp1.
f = Sa22.
f * Sa21.
f;
105 Stmp1.
f = Sa32.
f * Sa31.
f;
108 Ss31.
f = Sa13.
f * Sa11.
f;
109 Stmp1.
f = Sa23.
f * Sa21.
f;
111 Stmp1.
f = Sa33.
f * Sa31.
f;
114 Ss22.
f = Sa12.
f * Sa12.
f;
115 Stmp1.
f = Sa22.
f * Sa22.
f;
117 Stmp1.
f = Sa32.
f * Sa32.
f;
120 Ss32.
f = Sa13.
f * Sa12.
f;
121 Stmp1.
f = Sa23.
f * Sa22.
f;
123 Stmp1.
f = Sa33.
f * Sa32.
f;
126 Ss33.
f = Sa13.
f * Sa13.
f;
127 Stmp1.
f = Sa23.
f * Sa23.
f;
129 Stmp1.
f = Sa33.
f * Sa33.
f;
140 for (
int i = 0; i < 4; i++) {
141 Ssh.
f = Ss21.
f * 0.5f;
144 Stmp2.
f = Ssh.
f * Ssh.
f;
146 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
147 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
149 Sch.
ui = Sch.
ui | Stmp2.
ui;
151 Stmp1.
f = Ssh.
f * Ssh.
f;
152 Stmp2.
f = Sch.
f * Sch.
f;
156 Ssh.
f = Stmp4.
f * Ssh.
f;
157 Sch.
f = Stmp4.
f * Sch.
f;
159 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
162 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
163 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
165 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
166 Sch.
ui = Sch.
ui | Stmp2.
ui;
168 Stmp1.
f = Ssh.
f * Ssh.
f;
169 Stmp2.
f = Sch.
f * Sch.
f;
171 Ss.
f = Sch.
f * Ssh.
f;
174 #ifdef DEBUG_JACOBI_CONJUGATE 175 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
183 Ss33.
f = Ss33.
f * Stmp3.
f;
184 Ss31.
f = Ss31.
f * Stmp3.
f;
185 Ss32.
f = Ss32.
f * Stmp3.
f;
186 Ss33.
f = Ss33.
f * Stmp3.
f;
188 Stmp1.
f = Ss.
f * Ss31.
f;
189 Stmp2.
f = Ss.
f * Ss32.
f;
190 Ss31.
f = Sc.
f * Ss31.
f;
191 Ss32.
f = Sc.
f * Ss32.
f;
195 Stmp2.
f = Ss.
f * Ss.
f;
196 Stmp1.
f = Ss22.
f * Stmp2.
f;
197 Stmp3.
f = Ss11.
f * Stmp2.
f;
198 Stmp4.
f = Sc.
f * Sc.
f;
199 Ss11.
f = Ss11.
f * Stmp4.
f;
200 Ss22.
f = Ss22.
f * Stmp4.
f;
205 Ss21.
f = Ss21.
f * Stmp4.
f;
206 Stmp4.
f = Sc.
f * Ss.
f;
207 Stmp2.
f = Stmp2.
f * Stmp4.
f;
208 Stmp5.
f = Stmp5.
f * Stmp4.
f;
213 #ifdef DEBUG_JACOBI_CONJUGATE 214 printf(
"%.20g\n", Ss11.
f);
215 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
216 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
223 Stmp1.
f = Ssh.
f * Sqvvx.
f;
224 Stmp2.
f = Ssh.
f * Sqvvy.
f;
225 Stmp3.
f = Ssh.
f * Sqvvz.
f;
226 Ssh.
f = Ssh.
f * Sqvs.
f;
228 Sqvs.
f = Sch.
f * Sqvs.
f;
229 Sqvvx.
f = Sch.
f * Sqvvx.
f;
230 Sqvvy.
f = Sch.
f * Sqvvy.
f;
231 Sqvvz.
f = Sch.
f * Sqvvz.
f;
238 #ifdef DEBUG_JACOBI_CONJUGATE 239 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.
f, Sqvvy.
f, Sqvvz.
f,
246 Ssh.
f = Ss32.
f * 0.5f;
249 Stmp2.
f = Ssh.
f * Ssh.
f;
251 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
252 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
254 Sch.
ui = Sch.
ui | Stmp2.
ui;
256 Stmp1.
f = Ssh.
f * Ssh.
f;
257 Stmp2.
f = Sch.
f * Sch.
f;
261 Ssh.
f = Stmp4.
f * Ssh.
f;
262 Sch.
f = Stmp4.
f * Sch.
f;
264 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
267 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
268 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
270 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
271 Sch.
ui = Sch.
ui | Stmp2.
ui;
273 Stmp1.
f = Ssh.
f * Ssh.
f;
274 Stmp2.
f = Sch.
f * Sch.
f;
276 Ss.
f = Sch.
f * Ssh.
f;
279 #ifdef DEBUG_JACOBI_CONJUGATE 280 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
289 Ss11.
f = Ss11.
f * Stmp3.
f;
290 Ss21.
f = Ss21.
f * Stmp3.
f;
291 Ss31.
f = Ss31.
f * Stmp3.
f;
292 Ss11.
f = Ss11.
f * Stmp3.
f;
294 Stmp1.
f = Ss.
f * Ss21.
f;
295 Stmp2.
f = Ss.
f * Ss31.
f;
296 Ss21.
f = Sc.
f * Ss21.
f;
297 Ss31.
f = Sc.
f * Ss31.
f;
301 Stmp2.
f = Ss.
f * Ss.
f;
302 Stmp1.
f = Ss33.
f * Stmp2.
f;
303 Stmp3.
f = Ss22.
f * Stmp2.
f;
304 Stmp4.
f = Sc.
f * Sc.
f;
305 Ss22.
f = Ss22.
f * Stmp4.
f;
306 Ss33.
f = Ss33.
f * Stmp4.
f;
311 Ss32.
f = Ss32.
f * Stmp4.
f;
312 Stmp4.
f = Sc.
f * Ss.
f;
313 Stmp2.
f = Stmp2.
f * Stmp4.
f;
314 Stmp5.
f = Stmp5.
f * Stmp4.
f;
319 #ifdef DEBUG_JACOBI_CONJUGATE 320 printf(
"%.20g\n", Ss11.
f);
321 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
322 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
329 Stmp1.
f = Ssh.
f * Sqvvx.
f;
330 Stmp2.
f = Ssh.
f * Sqvvy.
f;
331 Stmp3.
f = Ssh.
f * Sqvvz.
f;
332 Ssh.
f = Ssh.
f * Sqvs.
f;
334 Sqvs.
f = Sch.
f * Sqvs.
f;
335 Sqvvx.
f = Sch.
f * Sqvvx.
f;
336 Sqvvy.
f = Sch.
f * Sqvvy.
f;
337 Sqvvz.
f = Sch.
f * Sqvvz.
f;
344 #ifdef DEBUG_JACOBI_CONJUGATE 345 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.
f, Sqvvy.
f, Sqvvz.
f,
353 Ssh.
f = Ss31.
f * 0.5f;
356 Stmp2.
f = Ssh.
f * Ssh.
f;
358 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
359 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
361 Sch.
ui = Sch.
ui | Stmp2.
ui;
363 Stmp1.
f = Ssh.
f * Ssh.
f;
364 Stmp2.
f = Sch.
f * Sch.
f;
368 Ssh.
f = Stmp4.
f * Ssh.
f;
369 Sch.
f = Stmp4.
f * Sch.
f;
371 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
374 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
375 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
377 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
378 Sch.
ui = Sch.
ui | Stmp2.
ui;
380 Stmp1.
f = Ssh.
f * Ssh.
f;
381 Stmp2.
f = Sch.
f * Sch.
f;
383 Ss.
f = Sch.
f * Ssh.
f;
386 #ifdef DEBUG_JACOBI_CONJUGATE 387 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
396 Ss22.
f = Ss22.
f * Stmp3.
f;
397 Ss32.
f = Ss32.
f * Stmp3.
f;
398 Ss21.
f = Ss21.
f * Stmp3.
f;
399 Ss22.
f = Ss22.
f * Stmp3.
f;
401 Stmp1.
f = Ss.
f * Ss32.
f;
402 Stmp2.
f = Ss.
f * Ss21.
f;
403 Ss32.
f = Sc.
f * Ss32.
f;
404 Ss21.
f = Sc.
f * Ss21.
f;
408 Stmp2.
f = Ss.
f * Ss.
f;
409 Stmp1.
f = Ss11.
f * Stmp2.
f;
410 Stmp3.
f = Ss33.
f * Stmp2.
f;
411 Stmp4.
f = Sc.
f * Sc.
f;
412 Ss33.
f = Ss33.
f * Stmp4.
f;
413 Ss11.
f = Ss11.
f * Stmp4.
f;
418 Ss31.
f = Ss31.
f * Stmp4.
f;
419 Stmp4.
f = Sc.
f * Ss.
f;
420 Stmp2.
f = Stmp2.
f * Stmp4.
f;
421 Stmp5.
f = Stmp5.
f * Stmp4.
f;
426 #ifdef DEBUG_JACOBI_CONJUGATE 427 printf(
"%.20g\n", Ss11.
f);
428 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
429 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
436 Stmp1.
f = Ssh.
f * Sqvvx.
f;
437 Stmp2.
f = Ssh.
f * Sqvvy.
f;
438 Stmp3.
f = Ssh.
f * Sqvvz.
f;
439 Ssh.
f = Ssh.
f * Sqvs.
f;
441 Sqvs.
f = Sch.
f * Sqvs.
f;
442 Sqvvx.
f = Sch.
f * Sqvvx.
f;
443 Sqvvy.
f = Sch.
f * Sqvvy.
f;
444 Sqvvz.
f = Sch.
f * Sqvvz.
f;
457 Stmp2.
f = Sqvs.
f * Sqvs.
f;
458 Stmp1.
f = Sqvvx.
f * Sqvvx.
f;
460 Stmp1.
f = Sqvvy.
f * Sqvvy.
f;
462 Stmp1.
f = Sqvvz.
f * Sqvvz.
f;
466 Stmp4.
f = Stmp1.
f * 0.5f;
467 Stmp3.
f = Stmp1.
f * Stmp4.
f;
468 Stmp3.
f = Stmp1.
f * Stmp3.
f;
469 Stmp3.
f = Stmp2.
f * Stmp3.
f;
473 Sqvs.
f = Sqvs.
f * Stmp1.
f;
474 Sqvvx.
f = Sqvvx.
f * Stmp1.
f;
475 Sqvvy.
f = Sqvvy.
f * Stmp1.
f;
476 Sqvvz.
f = Sqvvz.
f * Stmp1.
f;
482 Stmp1.
f = Sqvvx.
f * Sqvvx.
f;
483 Stmp2.
f = Sqvvy.
f * Sqvvy.
f;
484 Stmp3.
f = Sqvvz.
f * Sqvvz.
f;
485 Sv11.
f = Sqvs.
f * Sqvs.
f;
497 Sv32.
f = Sqvs.
f * Stmp1.
f;
498 Sv13.
f = Sqvs.
f * Stmp2.
f;
499 Sv21.
f = Sqvs.
f * Stmp3.
f;
500 Stmp1.
f = Sqvvy.
f * Stmp1.
f;
501 Stmp2.
f = Sqvvz.
f * Stmp2.
f;
502 Stmp3.
f = Sqvvx.
f * Stmp3.
f;
516 Sa12.
f = Sv12.
f * Sa11.
f;
517 Sa13.
f = Sv13.
f * Sa11.
f;
518 Sa11.
f = Sv11.
f * Sa11.
f;
519 Stmp1.
f = Sv21.
f * Stmp2.
f;
521 Stmp1.
f = Sv31.
f * Stmp3.
f;
523 Stmp1.
f = Sv22.
f * Stmp2.
f;
525 Stmp1.
f = Sv32.
f * Stmp3.
f;
527 Stmp1.
f = Sv23.
f * Stmp2.
f;
529 Stmp1.
f = Sv33.
f * Stmp3.
f;
534 Sa22.
f = Sv12.
f * Sa21.
f;
535 Sa23.
f = Sv13.
f * Sa21.
f;
536 Sa21.
f = Sv11.
f * Sa21.
f;
537 Stmp1.
f = Sv21.
f * Stmp2.
f;
539 Stmp1.
f = Sv31.
f * Stmp3.
f;
541 Stmp1.
f = Sv22.
f * Stmp2.
f;
543 Stmp1.
f = Sv32.
f * Stmp3.
f;
545 Stmp1.
f = Sv23.
f * Stmp2.
f;
547 Stmp1.
f = Sv33.
f * Stmp3.
f;
552 Sa32.
f = Sv12.
f * Sa31.
f;
553 Sa33.
f = Sv13.
f * Sa31.
f;
554 Sa31.
f = Sv11.
f * Sa31.
f;
555 Stmp1.
f = Sv21.
f * Stmp2.
f;
557 Stmp1.
f = Sv31.
f * Stmp3.
f;
559 Stmp1.
f = Sv22.
f * Stmp2.
f;
561 Stmp1.
f = Sv32.
f * Stmp3.
f;
563 Stmp1.
f = Sv23.
f * Stmp2.
f;
565 Stmp1.
f = Sv33.
f * Stmp3.
f;
572 Stmp1.
f = Sa11.
f * Sa11.
f;
573 Stmp4.
f = Sa21.
f * Sa21.
f;
575 Stmp4.
f = Sa31.
f * Sa31.
f;
578 Stmp2.
f = Sa12.
f * Sa12.
f;
579 Stmp4.
f = Sa22.
f * Sa22.
f;
581 Stmp4.
f = Sa32.
f * Sa32.
f;
584 Stmp3.
f = Sa13.
f * Sa13.
f;
585 Stmp4.
f = Sa23.
f * Sa23.
f;
587 Stmp4.
f = Sa33.
f * Sa33.
f;
592 Stmp4.
ui = (Stmp1.
f < Stmp2.
f) ? 0xffffffff : 0;
593 Stmp5.
ui = Sa11.
ui ^ Sa12.
ui;
594 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
595 Sa11.
ui = Sa11.
ui ^ Stmp5.
ui;
596 Sa12.
ui = Sa12.
ui ^ Stmp5.
ui;
598 Stmp5.
ui = Sa21.
ui ^ Sa22.
ui;
599 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
600 Sa21.
ui = Sa21.
ui ^ Stmp5.
ui;
601 Sa22.
ui = Sa22.
ui ^ Stmp5.
ui;
603 Stmp5.
ui = Sa31.
ui ^ Sa32.
ui;
604 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
605 Sa31.
ui = Sa31.
ui ^ Stmp5.
ui;
606 Sa32.
ui = Sa32.
ui ^ Stmp5.
ui;
608 Stmp5.
ui = Sv11.
ui ^ Sv12.
ui;
609 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
610 Sv11.
ui = Sv11.
ui ^ Stmp5.
ui;
611 Sv12.
ui = Sv12.
ui ^ Stmp5.
ui;
613 Stmp5.
ui = Sv21.
ui ^ Sv22.
ui;
614 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
615 Sv21.
ui = Sv21.
ui ^ Stmp5.
ui;
616 Sv22.
ui = Sv22.
ui ^ Stmp5.
ui;
618 Stmp5.
ui = Sv31.
ui ^ Sv32.
ui;
619 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
620 Sv31.
ui = Sv31.
ui ^ Stmp5.
ui;
621 Sv32.
ui = Sv32.
ui ^ Stmp5.
ui;
623 Stmp5.
ui = Stmp1.
ui ^ Stmp2.
ui;
624 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
625 Stmp1.
ui = Stmp1.
ui ^ Stmp5.
ui;
626 Stmp2.
ui = Stmp2.
ui ^ Stmp5.
ui;
632 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
636 Sa12.
f = Sa12.
f * Stmp4.
f;
637 Sa22.
f = Sa22.
f * Stmp4.
f;
638 Sa32.
f = Sa32.
f * Stmp4.
f;
640 Sv12.
f = Sv12.
f * Stmp4.
f;
641 Sv22.
f = Sv22.
f * Stmp4.
f;
642 Sv32.
f = Sv32.
f * Stmp4.
f;
646 Stmp4.
ui = (Stmp1.
f < Stmp3.
f) ? 0xffffffff : 0;
647 Stmp5.
ui = Sa11.
ui ^ Sa13.
ui;
648 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
649 Sa11.
ui = Sa11.
ui ^ Stmp5.
ui;
650 Sa13.
ui = Sa13.
ui ^ Stmp5.
ui;
652 Stmp5.
ui = Sa21.
ui ^ Sa23.
ui;
653 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
654 Sa21.
ui = Sa21.
ui ^ Stmp5.
ui;
655 Sa23.
ui = Sa23.
ui ^ Stmp5.
ui;
657 Stmp5.
ui = Sa31.
ui ^ Sa33.
ui;
658 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
659 Sa31.
ui = Sa31.
ui ^ Stmp5.
ui;
660 Sa33.
ui = Sa33.
ui ^ Stmp5.
ui;
662 Stmp5.
ui = Sv11.
ui ^ Sv13.
ui;
663 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
664 Sv11.
ui = Sv11.
ui ^ Stmp5.
ui;
665 Sv13.
ui = Sv13.
ui ^ Stmp5.
ui;
667 Stmp5.
ui = Sv21.
ui ^ Sv23.
ui;
668 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
669 Sv21.
ui = Sv21.
ui ^ Stmp5.
ui;
670 Sv23.
ui = Sv23.
ui ^ Stmp5.
ui;
672 Stmp5.
ui = Sv31.
ui ^ Sv33.
ui;
673 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
674 Sv31.
ui = Sv31.
ui ^ Stmp5.
ui;
675 Sv33.
ui = Sv33.
ui ^ Stmp5.
ui;
677 Stmp5.
ui = Stmp1.
ui ^ Stmp3.
ui;
678 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
679 Stmp1.
ui = Stmp1.
ui ^ Stmp5.
ui;
680 Stmp3.
ui = Stmp3.
ui ^ Stmp5.
ui;
686 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
690 Sa11.
f = Sa11.
f * Stmp4.
f;
691 Sa21.
f = Sa21.
f * Stmp4.
f;
692 Sa31.
f = Sa31.
f * Stmp4.
f;
694 Sv11.
f = Sv11.
f * Stmp4.
f;
695 Sv21.
f = Sv21.
f * Stmp4.
f;
696 Sv31.
f = Sv31.
f * Stmp4.
f;
700 Stmp4.
ui = (Stmp2.
f < Stmp3.
f) ? 0xffffffff : 0;
701 Stmp5.
ui = Sa12.
ui ^ Sa13.
ui;
702 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
703 Sa12.
ui = Sa12.
ui ^ Stmp5.
ui;
704 Sa13.
ui = Sa13.
ui ^ Stmp5.
ui;
706 Stmp5.
ui = Sa22.
ui ^ Sa23.
ui;
707 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
708 Sa22.
ui = Sa22.
ui ^ Stmp5.
ui;
709 Sa23.
ui = Sa23.
ui ^ Stmp5.
ui;
711 Stmp5.
ui = Sa32.
ui ^ Sa33.
ui;
712 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
713 Sa32.
ui = Sa32.
ui ^ Stmp5.
ui;
714 Sa33.
ui = Sa33.
ui ^ Stmp5.
ui;
716 Stmp5.
ui = Sv12.
ui ^ Sv13.
ui;
717 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
718 Sv12.
ui = Sv12.
ui ^ Stmp5.
ui;
719 Sv13.
ui = Sv13.
ui ^ Stmp5.
ui;
721 Stmp5.
ui = Sv22.
ui ^ Sv23.
ui;
722 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
723 Sv22.
ui = Sv22.
ui ^ Stmp5.
ui;
724 Sv23.
ui = Sv23.
ui ^ Stmp5.
ui;
726 Stmp5.
ui = Sv32.
ui ^ Sv33.
ui;
727 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
728 Sv32.
ui = Sv32.
ui ^ Stmp5.
ui;
729 Sv33.
ui = Sv33.
ui ^ Stmp5.
ui;
731 Stmp5.
ui = Stmp2.
ui ^ Stmp3.
ui;
732 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
733 Stmp2.
ui = Stmp2.
ui ^ Stmp5.
ui;
734 Stmp3.
ui = Stmp3.
ui ^ Stmp5.
ui;
740 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
744 Sa13.
f = Sa13.
f * Stmp4.
f;
745 Sa23.
f = Sa23.
f * Stmp4.
f;
746 Sa33.
f = Sa33.
f * Stmp4.
f;
748 Sv13.
f = Sv13.
f * Stmp4.
f;
749 Sv23.
f = Sv23.
f * Stmp4.
f;
750 Sv33.
f = Sv33.
f * Stmp4.
f;
766 Ssh.
f = Sa21.
f * Sa21.
f;
768 Ssh.
ui = Ssh.
ui & Sa21.
ui;
772 Sch.
f =
max(Sch.
f, Sa11.
f);
774 Stmp5.
ui = (Sa11.
f >= Stmp5.
f) ? 0xffffffff : 0;
776 Stmp1.
f = Sch.
f * Sch.
f;
777 Stmp2.
f = Ssh.
f * Ssh.
f;
781 Stmp4.
f = Stmp1.
f * 0.5f;
782 Stmp3.
f = Stmp1.
f * Stmp4.
f;
783 Stmp3.
f = Stmp1.
f * Stmp3.
f;
784 Stmp3.
f = Stmp2.
f * Stmp3.
f;
787 Stmp1.
f = Stmp1.
f * Stmp2.
f;
791 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
792 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
793 Sch.
ui = Stmp5.
ui & Sch.
ui;
794 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
795 Sch.
ui = Sch.
ui | Stmp1.
ui;
796 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
798 Stmp1.
f = Sch.
f * Sch.
f;
799 Stmp2.
f = Ssh.
f * Ssh.
f;
803 Stmp4.
f = Stmp1.
f * 0.5f;
804 Stmp3.
f = Stmp1.
f * Stmp4.
f;
805 Stmp3.
f = Stmp1.
f * Stmp3.
f;
806 Stmp3.
f = Stmp2.
f * Stmp3.
f;
810 Sch.
f = Sch.
f * Stmp1.
f;
811 Ssh.
f = Ssh.
f * Stmp1.
f;
813 Sc.
f = Sch.
f * Sch.
f;
814 Ss.
f = Ssh.
f * Ssh.
f;
816 Ss.
f = Ssh.
f * Sch.
f;
823 Stmp1.
f = Ss.
f * Sa11.
f;
824 Stmp2.
f = Ss.
f * Sa21.
f;
825 Sa11.
f = Sc.
f * Sa11.
f;
826 Sa21.
f = Sc.
f * Sa21.
f;
830 Stmp1.
f = Ss.
f * Sa12.
f;
831 Stmp2.
f = Ss.
f * Sa22.
f;
832 Sa12.
f = Sc.
f * Sa12.
f;
833 Sa22.
f = Sc.
f * Sa22.
f;
837 Stmp1.
f = Ss.
f * Sa13.
f;
838 Stmp2.
f = Ss.
f * Sa23.
f;
839 Sa13.
f = Sc.
f * Sa13.
f;
840 Sa23.
f = Sc.
f * Sa23.
f;
848 Stmp1.
f = Ss.
f * Su11.
f;
849 Stmp2.
f = Ss.
f * Su12.
f;
850 Su11.
f = Sc.
f * Su11.
f;
851 Su12.
f = Sc.
f * Su12.
f;
855 Stmp1.
f = Ss.
f * Su21.
f;
856 Stmp2.
f = Ss.
f * Su22.
f;
857 Su21.
f = Sc.
f * Su21.
f;
858 Su22.
f = Sc.
f * Su22.
f;
862 Stmp1.
f = Ss.
f * Su31.
f;
863 Stmp2.
f = Ss.
f * Su32.
f;
864 Su31.
f = Sc.
f * Su31.
f;
865 Su32.
f = Sc.
f * Su32.
f;
871 Ssh.
f = Sa31.
f * Sa31.
f;
873 Ssh.
ui = Ssh.
ui & Sa31.
ui;
877 Sch.
f =
max(Sch.
f, Sa11.
f);
879 Stmp5.
ui = (Sa11.
f >= Stmp5.
f) ? 0xffffffff : 0;
881 Stmp1.
f = Sch.
f * Sch.
f;
882 Stmp2.
f = Ssh.
f * Ssh.
f;
886 Stmp4.
f = Stmp1.
f * 0.5;
887 Stmp3.
f = Stmp1.
f * Stmp4.
f;
888 Stmp3.
f = Stmp1.
f * Stmp3.
f;
889 Stmp3.
f = Stmp2.
f * Stmp3.
f;
892 Stmp1.
f = Stmp1.
f * Stmp2.
f;
896 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
897 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
898 Sch.
ui = Stmp5.
ui & Sch.
ui;
899 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
900 Sch.
ui = Sch.
ui | Stmp1.
ui;
901 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
903 Stmp1.
f = Sch.
f * Sch.
f;
904 Stmp2.
f = Ssh.
f * Ssh.
f;
908 Stmp4.
f = Stmp1.
f * 0.5f;
909 Stmp3.
f = Stmp1.
f * Stmp4.
f;
910 Stmp3.
f = Stmp1.
f * Stmp3.
f;
911 Stmp3.
f = Stmp2.
f * Stmp3.
f;
915 Sch.
f = Sch.
f * Stmp1.
f;
916 Ssh.
f = Ssh.
f * Stmp1.
f;
918 Sc.
f = Sch.
f * Sch.
f;
919 Ss.
f = Ssh.
f * Ssh.
f;
921 Ss.
f = Ssh.
f * Sch.
f;
928 Stmp1.
f = Ss.
f * Sa11.
f;
929 Stmp2.
f = Ss.
f * Sa31.
f;
930 Sa11.
f = Sc.
f * Sa11.
f;
931 Sa31.
f = Sc.
f * Sa31.
f;
935 Stmp1.
f = Ss.
f * Sa12.
f;
936 Stmp2.
f = Ss.
f * Sa32.
f;
937 Sa12.
f = Sc.
f * Sa12.
f;
938 Sa32.
f = Sc.
f * Sa32.
f;
942 Stmp1.
f = Ss.
f * Sa13.
f;
943 Stmp2.
f = Ss.
f * Sa33.
f;
944 Sa13.
f = Sc.
f * Sa13.
f;
945 Sa33.
f = Sc.
f * Sa33.
f;
953 Stmp1.
f = Ss.
f * Su11.
f;
954 Stmp2.
f = Ss.
f * Su13.
f;
955 Su11.
f = Sc.
f * Su11.
f;
956 Su13.
f = Sc.
f * Su13.
f;
960 Stmp1.
f = Ss.
f * Su21.
f;
961 Stmp2.
f = Ss.
f * Su23.
f;
962 Su21.
f = Sc.
f * Su21.
f;
963 Su23.
f = Sc.
f * Su23.
f;
967 Stmp1.
f = Ss.
f * Su31.
f;
968 Stmp2.
f = Ss.
f * Su33.
f;
969 Su31.
f = Sc.
f * Su31.
f;
970 Su33.
f = Sc.
f * Su33.
f;
976 Ssh.
f = Sa32.
f * Sa32.
f;
978 Ssh.
ui = Ssh.
ui & Sa32.
ui;
982 Sch.
f =
max(Sch.
f, Sa22.
f);
984 Stmp5.
ui = (Sa22.
f >= Stmp5.
f) ? 0xffffffff : 0;
986 Stmp1.
f = Sch.
f * Sch.
f;
987 Stmp2.
f = Ssh.
f * Ssh.
f;
991 Stmp4.
f = Stmp1.
f * 0.5f;
992 Stmp3.
f = Stmp1.
f * Stmp4.
f;
993 Stmp3.
f = Stmp1.
f * Stmp3.
f;
994 Stmp3.
f = Stmp2.
f * Stmp3.
f;
997 Stmp1.
f = Stmp1.
f * Stmp2.
f;
1001 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
1002 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
1003 Sch.
ui = Stmp5.
ui & Sch.
ui;
1004 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
1005 Sch.
ui = Sch.
ui | Stmp1.
ui;
1006 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1008 Stmp1.
f = Sch.
f * Sch.
f;
1009 Stmp2.
f = Ssh.
f * Ssh.
f;
1013 Stmp4.
f = Stmp1.
f * 0.5f;
1014 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1015 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1016 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1020 Sch.
f = Sch.
f * Stmp1.
f;
1021 Ssh.
f = Ssh.
f * Stmp1.
f;
1023 Sc.
f = Sch.
f * Sch.
f;
1024 Ss.
f = Ssh.
f * Ssh.
f;
1026 Ss.
f = Ssh.
f * Sch.
f;
1033 Stmp1.
f = Ss.
f * Sa21.
f;
1034 Stmp2.
f = Ss.
f * Sa31.
f;
1035 Sa21.
f = Sc.
f * Sa21.
f;
1036 Sa31.
f = Sc.
f * Sa31.
f;
1040 Stmp1.
f = Ss.
f * Sa22.
f;
1041 Stmp2.
f = Ss.
f * Sa32.
f;
1042 Sa22.
f = Sc.
f * Sa22.
f;
1043 Sa32.
f = Sc.
f * Sa32.
f;
1047 Stmp1.
f = Ss.
f * Sa23.
f;
1048 Stmp2.
f = Ss.
f * Sa33.
f;
1049 Sa23.
f = Sc.
f * Sa23.
f;
1050 Sa33.
f = Sc.
f * Sa33.
f;
1058 Stmp1.
f = Ss.
f * Su12.
f;
1059 Stmp2.
f = Ss.
f * Su13.
f;
1060 Su12.
f = Sc.
f * Su12.
f;
1061 Su13.
f = Sc.
f * Su13.
f;
1065 Stmp1.
f = Ss.
f * Su22.
f;
1066 Stmp2.
f = Ss.
f * Su23.
f;
1067 Su22.
f = Sc.
f * Su22.
f;
1068 Su23.
f = Sc.
f * Su23.
f;
1072 Stmp1.
f = Ss.
f * Su32.
f;
1073 Stmp2.
f = Ss.
f * Su33.
f;
1074 Su32.
f = Sc.
f * Su32.
f;
1075 Su33.
f = Sc.
f * Su33.
f;
#define gfour_gamma_squared
Definition: SVD3x3CPU.h:31
Definition: SVD3x3CPU.h:33
#define __fadd_rn(x, y)
Definition: SVD3x3CPU.h:39
void svd(float a11, float a12, float a13, float a21, float a22, float a23, float a31, float a32, float a33, float &u11, float &u12, float &u13, float &u21, float &u22, float &u23, float &u31, float &u32, float &u33, float &s11, float &s22, float &s33, float &v11, float &v12, float &v13, float &v21, float &v22, float &v23, float &v31, float &v32, float &v33)
Definition: SVD3x3CPU.h:43
#define gcosine_pi_over_eight
Definition: SVD3x3CPU.h:27
#define gsine_pi_over_eight
Definition: SVD3x3CPU.h:25
#define __fsub_rn(x, y)
Definition: SVD3x3CPU.h:40
#define gone
Definition: SVD3x3CPU.h:24
#define gsmall_number
Definition: SVD3x3CPU.h:29
float f
Definition: SVD3x3CPU.h:34
#define gtiny_number
Definition: SVD3x3CPU.h:30
#define __frsqrt_rn(x)
Definition: SVD3x3CPU.h:41
unsigned int ui
Definition: SVD3x3CPU.h:35
#define max(x, y)
Definition: SVD3x3CPU.h:38