51 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 58 #define gone 1065353216 59 #define gsine_pi_over_eight 1053028117 61 #define gcosine_pi_over_eight 1064076127 62 #define gtiny_number 1.e-20 63 #define gfour_gamma_squared 5.8284273147583007813 71 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 72 #define __fadd_rn(x, y) __fadd_rn(x, y) 73 #define __fsub_rn(x, y) __fsub_rn(x, y) 74 #define __frsqrt_rn(x) __frsqrt_rn(x) 76 #define __dadd_rn(x, y) __dadd_rn(x, y) 77 #define __dsub_rn(x, y) __dsub_rn(x, y) 78 #define __drsqrt_rn(x) __drcp_rn(__dsqrt_rn(x)) 81 #define __fadd_rn(x, y) (x + y) 82 #define __fsub_rn(x, y) (x - y) 83 #define __frsqrt_rn(x) (1.0 / sqrt(x)) 85 #define __dadd_rn(x, y) (x + y) 86 #define __dsub_rn(x, y) (x - y) 87 #define __drsqrt_rn(x) (1.0 / sqrt(x)) 89 #define __add_rn(x, y) (x + y) 90 #define __sub_rn(x, y) (x - y) 91 #define __rsqrt_rn(x) (1.0 / sqrt(x)) 99 template <
typename scalar_t>
105 template <
typename scalar_t>
116 double gsmall_number = 1.e-12;
118 un<double> Sa11, Sa21, Sa31, Sa12, Sa22, Sa32, Sa13, Sa23, Sa33;
119 un<double> Su11, Su21, Su31, Su12, Su22, Su32, Su13, Su23, Su33;
120 un<double> Sv11, Sv21, Sv31, Sv12, Sv22, Sv32, Sv13, Sv23, Sv33;
123 un<double> Ss11, Ss21, Ss31, Ss22, Ss32, Ss33;
140 Ss11.
f = Sa11.
f * Sa11.
f;
141 Stmp1.
f = Sa21.
f * Sa21.
f;
143 Stmp1.
f = Sa31.
f * Sa31.
f;
146 Ss21.
f = Sa12.
f * Sa11.
f;
147 Stmp1.
f = Sa22.
f * Sa21.
f;
149 Stmp1.
f = Sa32.
f * Sa31.
f;
152 Ss31.
f = Sa13.
f * Sa11.
f;
153 Stmp1.
f = Sa23.
f * Sa21.
f;
155 Stmp1.
f = Sa33.
f * Sa31.
f;
158 Ss22.
f = Sa12.
f * Sa12.
f;
159 Stmp1.
f = Sa22.
f * Sa22.
f;
161 Stmp1.
f = Sa32.
f * Sa32.
f;
164 Ss32.
f = Sa13.
f * Sa12.
f;
165 Stmp1.
f = Sa23.
f * Sa22.
f;
167 Stmp1.
f = Sa33.
f * Sa32.
f;
170 Ss33.
f = Sa13.
f * Sa13.
f;
171 Stmp1.
f = Sa23.
f * Sa23.
f;
173 Stmp1.
f = Sa33.
f * Sa33.
f;
184 for (
int i = 0; i < 4; i++) {
185 Ssh.
f = Ss21.
f * 0.5f;
188 Stmp2.
f = Ssh.
f * Ssh.
f;
190 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
191 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
193 Sch.
ui = Sch.
ui | Stmp2.
ui;
195 Stmp1.
f = Ssh.
f * Ssh.
f;
196 Stmp2.
f = Sch.
f * Sch.
f;
200 Ssh.
f = Stmp4.
f * Ssh.
f;
201 Sch.
f = Stmp4.
f * Sch.
f;
203 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
206 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
207 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
209 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
210 Sch.
ui = Sch.
ui | Stmp2.
ui;
212 Stmp1.
f = Ssh.
f * Ssh.
f;
213 Stmp2.
f = Sch.
f * Sch.
f;
215 Ss.
f = Sch.
f * Ssh.
f;
218 #ifdef DEBUG_JACOBI_CONJUGATE 219 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
227 Ss33.
f = Ss33.
f * Stmp3.
f;
228 Ss31.
f = Ss31.
f * Stmp3.
f;
229 Ss32.
f = Ss32.
f * Stmp3.
f;
230 Ss33.
f = Ss33.
f * Stmp3.
f;
232 Stmp1.
f = Ss.
f * Ss31.
f;
233 Stmp2.
f = Ss.
f * Ss32.
f;
234 Ss31.
f = Sc.
f * Ss31.
f;
235 Ss32.
f = Sc.
f * Ss32.
f;
239 Stmp2.
f = Ss.
f * Ss.
f;
240 Stmp1.
f = Ss22.
f * Stmp2.
f;
241 Stmp3.
f = Ss11.
f * Stmp2.
f;
242 Stmp4.
f = Sc.
f * Sc.
f;
243 Ss11.
f = Ss11.
f * Stmp4.
f;
244 Ss22.
f = Ss22.
f * Stmp4.
f;
249 Ss21.
f = Ss21.
f * Stmp4.
f;
250 Stmp4.
f = Sc.
f * Ss.
f;
251 Stmp2.
f = Stmp2.
f * Stmp4.
f;
252 Stmp5.
f = Stmp5.
f * Stmp4.
f;
257 #ifdef DEBUG_JACOBI_CONJUGATE 258 printf(
"%.20g\n", Ss11.
f);
259 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
260 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
267 Stmp1.
f = Ssh.
f * Sqvvx.
f;
268 Stmp2.
f = Ssh.
f * Sqvvy.
f;
269 Stmp3.
f = Ssh.
f * Sqvvz.
f;
270 Ssh.
f = Ssh.
f * Sqvs.
f;
272 Sqvs.
f = Sch.
f * Sqvs.
f;
273 Sqvvx.
f = Sch.
f * Sqvvx.
f;
274 Sqvvy.
f = Sch.
f * Sqvvy.
f;
275 Sqvvz.
f = Sch.
f * Sqvvz.
f;
282 #ifdef DEBUG_JACOBI_CONJUGATE 283 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.
f, Sqvvy.
f, Sqvvz.
f,
290 Ssh.
f = Ss32.
f * 0.5f;
293 Stmp2.
f = Ssh.
f * Ssh.
f;
295 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
296 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
298 Sch.
ui = Sch.
ui | Stmp2.
ui;
300 Stmp1.
f = Ssh.
f * Ssh.
f;
301 Stmp2.
f = Sch.
f * Sch.
f;
305 Ssh.
f = Stmp4.
f * Ssh.
f;
306 Sch.
f = Stmp4.
f * Sch.
f;
308 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
311 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
312 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
314 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
315 Sch.
ui = Sch.
ui | Stmp2.
ui;
317 Stmp1.
f = Ssh.
f * Ssh.
f;
318 Stmp2.
f = Sch.
f * Sch.
f;
320 Ss.
f = Sch.
f * Ssh.
f;
323 #ifdef DEBUG_JACOBI_CONJUGATE 324 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
333 Ss11.
f = Ss11.
f * Stmp3.
f;
334 Ss21.
f = Ss21.
f * Stmp3.
f;
335 Ss31.
f = Ss31.
f * Stmp3.
f;
336 Ss11.
f = Ss11.
f * Stmp3.
f;
338 Stmp1.
f = Ss.
f * Ss21.
f;
339 Stmp2.
f = Ss.
f * Ss31.
f;
340 Ss21.
f = Sc.
f * Ss21.
f;
341 Ss31.
f = Sc.
f * Ss31.
f;
345 Stmp2.
f = Ss.
f * Ss.
f;
346 Stmp1.
f = Ss33.
f * Stmp2.
f;
347 Stmp3.
f = Ss22.
f * Stmp2.
f;
348 Stmp4.
f = Sc.
f * Sc.
f;
349 Ss22.
f = Ss22.
f * Stmp4.
f;
350 Ss33.
f = Ss33.
f * Stmp4.
f;
355 Ss32.
f = Ss32.
f * Stmp4.
f;
356 Stmp4.
f = Sc.
f * Ss.
f;
357 Stmp2.
f = Stmp2.
f * Stmp4.
f;
358 Stmp5.
f = Stmp5.
f * Stmp4.
f;
363 #ifdef DEBUG_JACOBI_CONJUGATE 364 printf(
"%.20g\n", Ss11.
f);
365 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
366 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
373 Stmp1.
f = Ssh.
f * Sqvvx.
f;
374 Stmp2.
f = Ssh.
f * Sqvvy.
f;
375 Stmp3.
f = Ssh.
f * Sqvvz.
f;
376 Ssh.
f = Ssh.
f * Sqvs.
f;
378 Sqvs.
f = Sch.
f * Sqvs.
f;
379 Sqvvx.
f = Sch.
f * Sqvvx.
f;
380 Sqvvy.
f = Sch.
f * Sqvvy.
f;
381 Sqvvz.
f = Sch.
f * Sqvvz.
f;
388 #ifdef DEBUG_JACOBI_CONJUGATE 389 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.
f, Sqvvy.
f, Sqvvz.
f,
397 Ssh.
f = Ss31.
f * 0.5f;
400 Stmp2.
f = Ssh.
f * Ssh.
f;
402 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
403 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
405 Sch.
ui = Sch.
ui | Stmp2.
ui;
407 Stmp1.
f = Ssh.
f * Ssh.
f;
408 Stmp2.
f = Sch.
f * Sch.
f;
412 Ssh.
f = Stmp4.
f * Ssh.
f;
413 Sch.
f = Stmp4.
f * Sch.
f;
415 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
418 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
419 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
421 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
422 Sch.
ui = Sch.
ui | Stmp2.
ui;
424 Stmp1.
f = Ssh.
f * Ssh.
f;
425 Stmp2.
f = Sch.
f * Sch.
f;
427 Ss.
f = Sch.
f * Ssh.
f;
430 #ifdef DEBUG_JACOBI_CONJUGATE 431 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
440 Ss22.
f = Ss22.
f * Stmp3.
f;
441 Ss32.
f = Ss32.
f * Stmp3.
f;
442 Ss21.
f = Ss21.
f * Stmp3.
f;
443 Ss22.
f = Ss22.
f * Stmp3.
f;
445 Stmp1.
f = Ss.
f * Ss32.
f;
446 Stmp2.
f = Ss.
f * Ss21.
f;
447 Ss32.
f = Sc.
f * Ss32.
f;
448 Ss21.
f = Sc.
f * Ss21.
f;
452 Stmp2.
f = Ss.
f * Ss.
f;
453 Stmp1.
f = Ss11.
f * Stmp2.
f;
454 Stmp3.
f = Ss33.
f * Stmp2.
f;
455 Stmp4.
f = Sc.
f * Sc.
f;
456 Ss33.
f = Ss33.
f * Stmp4.
f;
457 Ss11.
f = Ss11.
f * Stmp4.
f;
462 Ss31.
f = Ss31.
f * Stmp4.
f;
463 Stmp4.
f = Sc.
f * Ss.
f;
464 Stmp2.
f = Stmp2.
f * Stmp4.
f;
465 Stmp5.
f = Stmp5.
f * Stmp4.
f;
470 #ifdef DEBUG_JACOBI_CONJUGATE 471 printf(
"%.20g\n", Ss11.
f);
472 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
473 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
480 Stmp1.
f = Ssh.
f * Sqvvx.
f;
481 Stmp2.
f = Ssh.
f * Sqvvy.
f;
482 Stmp3.
f = Ssh.
f * Sqvvz.
f;
483 Ssh.
f = Ssh.
f * Sqvs.
f;
485 Sqvs.
f = Sch.
f * Sqvs.
f;
486 Sqvvx.
f = Sch.
f * Sqvvx.
f;
487 Sqvvy.
f = Sch.
f * Sqvvy.
f;
488 Sqvvz.
f = Sch.
f * Sqvvz.
f;
501 Stmp2.
f = Sqvs.
f * Sqvs.
f;
502 Stmp1.
f = Sqvvx.
f * Sqvvx.
f;
504 Stmp1.
f = Sqvvy.
f * Sqvvy.
f;
506 Stmp1.
f = Sqvvz.
f * Sqvvz.
f;
510 Stmp4.
f = Stmp1.
f * 0.5f;
511 Stmp3.
f = Stmp1.
f * Stmp4.
f;
512 Stmp3.
f = Stmp1.
f * Stmp3.
f;
513 Stmp3.
f = Stmp2.
f * Stmp3.
f;
517 Sqvs.
f = Sqvs.
f * Stmp1.
f;
518 Sqvvx.
f = Sqvvx.
f * Stmp1.
f;
519 Sqvvy.
f = Sqvvy.
f * Stmp1.
f;
520 Sqvvz.
f = Sqvvz.
f * Stmp1.
f;
526 Stmp1.
f = Sqvvx.
f * Sqvvx.
f;
527 Stmp2.
f = Sqvvy.
f * Sqvvy.
f;
528 Stmp3.
f = Sqvvz.
f * Sqvvz.
f;
529 Sv11.
f = Sqvs.
f * Sqvs.
f;
541 Sv32.
f = Sqvs.
f * Stmp1.
f;
542 Sv13.
f = Sqvs.
f * Stmp2.
f;
543 Sv21.
f = Sqvs.
f * Stmp3.
f;
544 Stmp1.
f = Sqvvy.
f * Stmp1.
f;
545 Stmp2.
f = Sqvvz.
f * Stmp2.
f;
546 Stmp3.
f = Sqvvx.
f * Stmp3.
f;
560 Sa12.
f = Sv12.
f * Sa11.
f;
561 Sa13.
f = Sv13.
f * Sa11.
f;
562 Sa11.
f = Sv11.
f * Sa11.
f;
563 Stmp1.
f = Sv21.
f * Stmp2.
f;
565 Stmp1.
f = Sv31.
f * Stmp3.
f;
567 Stmp1.
f = Sv22.
f * Stmp2.
f;
569 Stmp1.
f = Sv32.
f * Stmp3.
f;
571 Stmp1.
f = Sv23.
f * Stmp2.
f;
573 Stmp1.
f = Sv33.
f * Stmp3.
f;
578 Sa22.
f = Sv12.
f * Sa21.
f;
579 Sa23.
f = Sv13.
f * Sa21.
f;
580 Sa21.
f = Sv11.
f * Sa21.
f;
581 Stmp1.
f = Sv21.
f * Stmp2.
f;
583 Stmp1.
f = Sv31.
f * Stmp3.
f;
585 Stmp1.
f = Sv22.
f * Stmp2.
f;
587 Stmp1.
f = Sv32.
f * Stmp3.
f;
589 Stmp1.
f = Sv23.
f * Stmp2.
f;
591 Stmp1.
f = Sv33.
f * Stmp3.
f;
596 Sa32.
f = Sv12.
f * Sa31.
f;
597 Sa33.
f = Sv13.
f * Sa31.
f;
598 Sa31.
f = Sv11.
f * Sa31.
f;
599 Stmp1.
f = Sv21.
f * Stmp2.
f;
601 Stmp1.
f = Sv31.
f * Stmp3.
f;
603 Stmp1.
f = Sv22.
f * Stmp2.
f;
605 Stmp1.
f = Sv32.
f * Stmp3.
f;
607 Stmp1.
f = Sv23.
f * Stmp2.
f;
609 Stmp1.
f = Sv33.
f * Stmp3.
f;
616 Stmp1.
f = Sa11.
f * Sa11.
f;
617 Stmp4.
f = Sa21.
f * Sa21.
f;
619 Stmp4.
f = Sa31.
f * Sa31.
f;
622 Stmp2.
f = Sa12.
f * Sa12.
f;
623 Stmp4.
f = Sa22.
f * Sa22.
f;
625 Stmp4.
f = Sa32.
f * Sa32.
f;
628 Stmp3.
f = Sa13.
f * Sa13.
f;
629 Stmp4.
f = Sa23.
f * Sa23.
f;
631 Stmp4.
f = Sa33.
f * Sa33.
f;
636 Stmp4.
ui = (Stmp1.
f < Stmp2.
f) ? 0xffffffff : 0;
637 Stmp5.
ui = Sa11.
ui ^ Sa12.
ui;
638 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
639 Sa11.
ui = Sa11.
ui ^ Stmp5.
ui;
640 Sa12.
ui = Sa12.
ui ^ Stmp5.
ui;
642 Stmp5.
ui = Sa21.
ui ^ Sa22.
ui;
643 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
644 Sa21.
ui = Sa21.
ui ^ Stmp5.
ui;
645 Sa22.
ui = Sa22.
ui ^ Stmp5.
ui;
647 Stmp5.
ui = Sa31.
ui ^ Sa32.
ui;
648 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
649 Sa31.
ui = Sa31.
ui ^ Stmp5.
ui;
650 Sa32.
ui = Sa32.
ui ^ Stmp5.
ui;
652 Stmp5.
ui = Sv11.
ui ^ Sv12.
ui;
653 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
654 Sv11.
ui = Sv11.
ui ^ Stmp5.
ui;
655 Sv12.
ui = Sv12.
ui ^ Stmp5.
ui;
657 Stmp5.
ui = Sv21.
ui ^ Sv22.
ui;
658 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
659 Sv21.
ui = Sv21.
ui ^ Stmp5.
ui;
660 Sv22.
ui = Sv22.
ui ^ Stmp5.
ui;
662 Stmp5.
ui = Sv31.
ui ^ Sv32.
ui;
663 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
664 Sv31.
ui = Sv31.
ui ^ Stmp5.
ui;
665 Sv32.
ui = Sv32.
ui ^ Stmp5.
ui;
667 Stmp5.
ui = Stmp1.
ui ^ Stmp2.
ui;
668 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
669 Stmp1.
ui = Stmp1.
ui ^ Stmp5.
ui;
670 Stmp2.
ui = Stmp2.
ui ^ Stmp5.
ui;
676 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
680 Sa12.
f = Sa12.
f * Stmp4.
f;
681 Sa22.
f = Sa22.
f * Stmp4.
f;
682 Sa32.
f = Sa32.
f * Stmp4.
f;
684 Sv12.
f = Sv12.
f * Stmp4.
f;
685 Sv22.
f = Sv22.
f * Stmp4.
f;
686 Sv32.
f = Sv32.
f * Stmp4.
f;
690 Stmp4.
ui = (Stmp1.
f < Stmp3.
f) ? 0xffffffff : 0;
691 Stmp5.
ui = Sa11.
ui ^ Sa13.
ui;
692 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
693 Sa11.
ui = Sa11.
ui ^ Stmp5.
ui;
694 Sa13.
ui = Sa13.
ui ^ Stmp5.
ui;
696 Stmp5.
ui = Sa21.
ui ^ Sa23.
ui;
697 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
698 Sa21.
ui = Sa21.
ui ^ Stmp5.
ui;
699 Sa23.
ui = Sa23.
ui ^ Stmp5.
ui;
701 Stmp5.
ui = Sa31.
ui ^ Sa33.
ui;
702 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
703 Sa31.
ui = Sa31.
ui ^ Stmp5.
ui;
704 Sa33.
ui = Sa33.
ui ^ Stmp5.
ui;
706 Stmp5.
ui = Sv11.
ui ^ Sv13.
ui;
707 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
708 Sv11.
ui = Sv11.
ui ^ Stmp5.
ui;
709 Sv13.
ui = Sv13.
ui ^ Stmp5.
ui;
711 Stmp5.
ui = Sv21.
ui ^ Sv23.
ui;
712 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
713 Sv21.
ui = Sv21.
ui ^ Stmp5.
ui;
714 Sv23.
ui = Sv23.
ui ^ Stmp5.
ui;
716 Stmp5.
ui = Sv31.
ui ^ Sv33.
ui;
717 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
718 Sv31.
ui = Sv31.
ui ^ Stmp5.
ui;
719 Sv33.
ui = Sv33.
ui ^ Stmp5.
ui;
721 Stmp5.
ui = Stmp1.
ui ^ Stmp3.
ui;
722 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
723 Stmp1.
ui = Stmp1.
ui ^ Stmp5.
ui;
724 Stmp3.
ui = Stmp3.
ui ^ Stmp5.
ui;
730 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
734 Sa11.
f = Sa11.
f * Stmp4.
f;
735 Sa21.
f = Sa21.
f * Stmp4.
f;
736 Sa31.
f = Sa31.
f * Stmp4.
f;
738 Sv11.
f = Sv11.
f * Stmp4.
f;
739 Sv21.
f = Sv21.
f * Stmp4.
f;
740 Sv31.
f = Sv31.
f * Stmp4.
f;
744 Stmp4.
ui = (Stmp2.
f < Stmp3.
f) ? 0xffffffff : 0;
745 Stmp5.
ui = Sa12.
ui ^ Sa13.
ui;
746 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
747 Sa12.
ui = Sa12.
ui ^ Stmp5.
ui;
748 Sa13.
ui = Sa13.
ui ^ Stmp5.
ui;
750 Stmp5.
ui = Sa22.
ui ^ Sa23.
ui;
751 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
752 Sa22.
ui = Sa22.
ui ^ Stmp5.
ui;
753 Sa23.
ui = Sa23.
ui ^ Stmp5.
ui;
755 Stmp5.
ui = Sa32.
ui ^ Sa33.
ui;
756 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
757 Sa32.
ui = Sa32.
ui ^ Stmp5.
ui;
758 Sa33.
ui = Sa33.
ui ^ Stmp5.
ui;
760 Stmp5.
ui = Sv12.
ui ^ Sv13.
ui;
761 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
762 Sv12.
ui = Sv12.
ui ^ Stmp5.
ui;
763 Sv13.
ui = Sv13.
ui ^ Stmp5.
ui;
765 Stmp5.
ui = Sv22.
ui ^ Sv23.
ui;
766 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
767 Sv22.
ui = Sv22.
ui ^ Stmp5.
ui;
768 Sv23.
ui = Sv23.
ui ^ Stmp5.
ui;
770 Stmp5.
ui = Sv32.
ui ^ Sv33.
ui;
771 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
772 Sv32.
ui = Sv32.
ui ^ Stmp5.
ui;
773 Sv33.
ui = Sv33.
ui ^ Stmp5.
ui;
775 Stmp5.
ui = Stmp2.
ui ^ Stmp3.
ui;
776 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
777 Stmp2.
ui = Stmp2.
ui ^ Stmp5.
ui;
778 Stmp3.
ui = Stmp3.
ui ^ Stmp5.
ui;
784 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
788 Sa13.
f = Sa13.
f * Stmp4.
f;
789 Sa23.
f = Sa23.
f * Stmp4.
f;
790 Sa33.
f = Sa33.
f * Stmp4.
f;
792 Sv13.
f = Sv13.
f * Stmp4.
f;
793 Sv23.
f = Sv23.
f * Stmp4.
f;
794 Sv33.
f = Sv33.
f * Stmp4.
f;
810 Ssh.
f = Sa21.
f * Sa21.
f;
811 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
812 Ssh.
ui = Ssh.
ui & Sa21.
ui;
816 Sch.
f = max(Sch.
f, Sa11.
f);
817 Sch.
f = max(Sch.
f, gsmall_number);
818 Stmp5.
ui = (Sa11.
f >= Stmp5.
f) ? 0xffffffff : 0;
820 Stmp1.
f = Sch.
f * Sch.
f;
821 Stmp2.
f = Ssh.
f * Ssh.
f;
825 Stmp4.
f = Stmp1.
f * 0.5f;
826 Stmp3.
f = Stmp1.
f * Stmp4.
f;
827 Stmp3.
f = Stmp1.
f * Stmp3.
f;
828 Stmp3.
f = Stmp2.
f * Stmp3.
f;
831 Stmp1.
f = Stmp1.
f * Stmp2.
f;
835 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
836 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
837 Sch.
ui = Stmp5.
ui & Sch.
ui;
838 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
839 Sch.
ui = Sch.
ui | Stmp1.
ui;
840 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
842 Stmp1.
f = Sch.
f * Sch.
f;
843 Stmp2.
f = Ssh.
f * Ssh.
f;
847 Stmp4.
f = Stmp1.
f * 0.5f;
848 Stmp3.
f = Stmp1.
f * Stmp4.
f;
849 Stmp3.
f = Stmp1.
f * Stmp3.
f;
850 Stmp3.
f = Stmp2.
f * Stmp3.
f;
854 Sch.
f = Sch.
f * Stmp1.
f;
855 Ssh.
f = Ssh.
f * Stmp1.
f;
857 Sc.
f = Sch.
f * Sch.
f;
858 Ss.
f = Ssh.
f * Ssh.
f;
860 Ss.
f = Ssh.
f * Sch.
f;
867 Stmp1.
f = Ss.
f * Sa11.
f;
868 Stmp2.
f = Ss.
f * Sa21.
f;
869 Sa11.
f = Sc.
f * Sa11.
f;
870 Sa21.
f = Sc.
f * Sa21.
f;
874 Stmp1.
f = Ss.
f * Sa12.
f;
875 Stmp2.
f = Ss.
f * Sa22.
f;
876 Sa12.
f = Sc.
f * Sa12.
f;
877 Sa22.
f = Sc.
f * Sa22.
f;
881 Stmp1.
f = Ss.
f * Sa13.
f;
882 Stmp2.
f = Ss.
f * Sa23.
f;
883 Sa13.
f = Sc.
f * Sa13.
f;
884 Sa23.
f = Sc.
f * Sa23.
f;
892 Stmp1.
f = Ss.
f * Su11.
f;
893 Stmp2.
f = Ss.
f * Su12.
f;
894 Su11.
f = Sc.
f * Su11.
f;
895 Su12.
f = Sc.
f * Su12.
f;
899 Stmp1.
f = Ss.
f * Su21.
f;
900 Stmp2.
f = Ss.
f * Su22.
f;
901 Su21.
f = Sc.
f * Su21.
f;
902 Su22.
f = Sc.
f * Su22.
f;
906 Stmp1.
f = Ss.
f * Su31.
f;
907 Stmp2.
f = Ss.
f * Su32.
f;
908 Su31.
f = Sc.
f * Su31.
f;
909 Su32.
f = Sc.
f * Su32.
f;
915 Ssh.
f = Sa31.
f * Sa31.
f;
916 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
917 Ssh.
ui = Ssh.
ui & Sa31.
ui;
921 Sch.
f = max(Sch.
f, Sa11.
f);
922 Sch.
f = max(Sch.
f, gsmall_number);
923 Stmp5.
ui = (Sa11.
f >= Stmp5.
f) ? 0xffffffff : 0;
925 Stmp1.
f = Sch.
f * Sch.
f;
926 Stmp2.
f = Ssh.
f * Ssh.
f;
930 Stmp4.
f = Stmp1.
f * 0.5;
931 Stmp3.
f = Stmp1.
f * Stmp4.
f;
932 Stmp3.
f = Stmp1.
f * Stmp3.
f;
933 Stmp3.
f = Stmp2.
f * Stmp3.
f;
936 Stmp1.
f = Stmp1.
f * Stmp2.
f;
940 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
941 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
942 Sch.
ui = Stmp5.
ui & Sch.
ui;
943 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
944 Sch.
ui = Sch.
ui | Stmp1.
ui;
945 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
947 Stmp1.
f = Sch.
f * Sch.
f;
948 Stmp2.
f = Ssh.
f * Ssh.
f;
952 Stmp4.
f = Stmp1.
f * 0.5f;
953 Stmp3.
f = Stmp1.
f * Stmp4.
f;
954 Stmp3.
f = Stmp1.
f * Stmp3.
f;
955 Stmp3.
f = Stmp2.
f * Stmp3.
f;
959 Sch.
f = Sch.
f * Stmp1.
f;
960 Ssh.
f = Ssh.
f * Stmp1.
f;
962 Sc.
f = Sch.
f * Sch.
f;
963 Ss.
f = Ssh.
f * Ssh.
f;
965 Ss.
f = Ssh.
f * Sch.
f;
972 Stmp1.
f = Ss.
f * Sa11.
f;
973 Stmp2.
f = Ss.
f * Sa31.
f;
974 Sa11.
f = Sc.
f * Sa11.
f;
975 Sa31.
f = Sc.
f * Sa31.
f;
979 Stmp1.
f = Ss.
f * Sa12.
f;
980 Stmp2.
f = Ss.
f * Sa32.
f;
981 Sa12.
f = Sc.
f * Sa12.
f;
982 Sa32.
f = Sc.
f * Sa32.
f;
986 Stmp1.
f = Ss.
f * Sa13.
f;
987 Stmp2.
f = Ss.
f * Sa33.
f;
988 Sa13.
f = Sc.
f * Sa13.
f;
989 Sa33.
f = Sc.
f * Sa33.
f;
997 Stmp1.
f = Ss.
f * Su11.
f;
998 Stmp2.
f = Ss.
f * Su13.
f;
999 Su11.
f = Sc.
f * Su11.
f;
1000 Su13.
f = Sc.
f * Su13.
f;
1004 Stmp1.
f = Ss.
f * Su21.
f;
1005 Stmp2.
f = Ss.
f * Su23.
f;
1006 Su21.
f = Sc.
f * Su21.
f;
1007 Su23.
f = Sc.
f * Su23.
f;
1011 Stmp1.
f = Ss.
f * Su31.
f;
1012 Stmp2.
f = Ss.
f * Su33.
f;
1013 Su31.
f = Sc.
f * Su31.
f;
1014 Su33.
f = Sc.
f * Su33.
f;
1020 Ssh.
f = Sa32.
f * Sa32.
f;
1021 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
1022 Ssh.
ui = Ssh.
ui & Sa32.
ui;
1026 Sch.
f = max(Sch.
f, Sa22.
f);
1027 Sch.
f = max(Sch.
f, gsmall_number);
1028 Stmp5.
ui = (Sa22.
f >= Stmp5.
f) ? 0xffffffff : 0;
1030 Stmp1.
f = Sch.
f * Sch.
f;
1031 Stmp2.
f = Ssh.
f * Ssh.
f;
1035 Stmp4.
f = Stmp1.
f * 0.5f;
1036 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1037 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1038 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1041 Stmp1.
f = Stmp1.
f * Stmp2.
f;
1045 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
1046 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
1047 Sch.
ui = Stmp5.
ui & Sch.
ui;
1048 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
1049 Sch.
ui = Sch.
ui | Stmp1.
ui;
1050 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1052 Stmp1.
f = Sch.
f * Sch.
f;
1053 Stmp2.
f = Ssh.
f * Ssh.
f;
1057 Stmp4.
f = Stmp1.
f * 0.5f;
1058 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1059 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1060 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1064 Sch.
f = Sch.
f * Stmp1.
f;
1065 Ssh.
f = Ssh.
f * Stmp1.
f;
1067 Sc.
f = Sch.
f * Sch.
f;
1068 Ss.
f = Ssh.
f * Ssh.
f;
1070 Ss.
f = Ssh.
f * Sch.
f;
1077 Stmp1.
f = Ss.
f * Sa21.
f;
1078 Stmp2.
f = Ss.
f * Sa31.
f;
1079 Sa21.
f = Sc.
f * Sa21.
f;
1080 Sa31.
f = Sc.
f * Sa31.
f;
1084 Stmp1.
f = Ss.
f * Sa22.
f;
1085 Stmp2.
f = Ss.
f * Sa32.
f;
1086 Sa22.
f = Sc.
f * Sa22.
f;
1087 Sa32.
f = Sc.
f * Sa32.
f;
1091 Stmp1.
f = Ss.
f * Sa23.
f;
1092 Stmp2.
f = Ss.
f * Sa33.
f;
1093 Sa23.
f = Sc.
f * Sa23.
f;
1094 Sa33.
f = Sc.
f * Sa33.
f;
1102 Stmp1.
f = Ss.
f * Su12.
f;
1103 Stmp2.
f = Ss.
f * Su13.
f;
1104 Su12.
f = Sc.
f * Su12.
f;
1105 Su13.
f = Sc.
f * Su13.
f;
1109 Stmp1.
f = Ss.
f * Su22.
f;
1110 Stmp2.
f = Ss.
f * Su23.
f;
1111 Su22.
f = Sc.
f * Su22.
f;
1112 Su23.
f = Sc.
f * Su23.
f;
1116 Stmp1.
f = Ss.
f * Su32.
f;
1117 Stmp2.
f = Ss.
f * Su33.
f;
1118 Su32.
f = Sc.
f * Su32.
f;
1119 Su33.
f = Sc.
f * Su33.
f;
1155 float gsmall_number = 1.e-12;
1157 un<float> Sa11, Sa21, Sa31, Sa12, Sa22, Sa32, Sa13, Sa23, Sa33;
1158 un<float> Su11, Su21, Su31, Su12, Su22, Su32, Su13, Su23, Su33;
1159 un<float> Sv11, Sv21, Sv31, Sv12, Sv22, Sv32, Sv13, Sv23, Sv33;
1161 un<float> Stmp1, Stmp2, Stmp3, Stmp4, Stmp5;
1162 un<float> Ss11, Ss21, Ss31, Ss22, Ss32, Ss33;
1179 Ss11.
f = Sa11.
f * Sa11.
f;
1180 Stmp1.
f = Sa21.
f * Sa21.
f;
1182 Stmp1.
f = Sa31.
f * Sa31.
f;
1185 Ss21.
f = Sa12.
f * Sa11.
f;
1186 Stmp1.
f = Sa22.
f * Sa21.
f;
1188 Stmp1.
f = Sa32.
f * Sa31.
f;
1191 Ss31.
f = Sa13.
f * Sa11.
f;
1192 Stmp1.
f = Sa23.
f * Sa21.
f;
1194 Stmp1.
f = Sa33.
f * Sa31.
f;
1197 Ss22.
f = Sa12.
f * Sa12.
f;
1198 Stmp1.
f = Sa22.
f * Sa22.
f;
1200 Stmp1.
f = Sa32.
f * Sa32.
f;
1203 Ss32.
f = Sa13.
f * Sa12.
f;
1204 Stmp1.
f = Sa23.
f * Sa22.
f;
1206 Stmp1.
f = Sa33.
f * Sa32.
f;
1209 Ss33.
f = Sa13.
f * Sa13.
f;
1210 Stmp1.
f = Sa23.
f * Sa23.
f;
1212 Stmp1.
f = Sa33.
f * Sa33.
f;
1223 for (
int i = 0; i < 4; i++) {
1224 Ssh.
f = Ss21.
f * 0.5f;
1227 Stmp2.
f = Ssh.
f * Ssh.
f;
1229 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
1230 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
1232 Sch.
ui = Sch.
ui | Stmp2.
ui;
1234 Stmp1.
f = Ssh.
f * Ssh.
f;
1235 Stmp2.
f = Sch.
f * Sch.
f;
1239 Ssh.
f = Stmp4.
f * Ssh.
f;
1240 Sch.
f = Stmp4.
f * Sch.
f;
1242 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
1245 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
1246 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1248 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
1249 Sch.
ui = Sch.
ui | Stmp2.
ui;
1251 Stmp1.
f = Ssh.
f * Ssh.
f;
1252 Stmp2.
f = Sch.
f * Sch.
f;
1254 Ss.
f = Sch.
f * Ssh.
f;
1257 #ifdef DEBUG_JACOBI_CONJUGATE 1258 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
1266 Ss33.
f = Ss33.
f * Stmp3.
f;
1267 Ss31.
f = Ss31.
f * Stmp3.
f;
1268 Ss32.
f = Ss32.
f * Stmp3.
f;
1269 Ss33.
f = Ss33.
f * Stmp3.
f;
1271 Stmp1.
f = Ss.
f * Ss31.
f;
1272 Stmp2.
f = Ss.
f * Ss32.
f;
1273 Ss31.
f = Sc.
f * Ss31.
f;
1274 Ss32.
f = Sc.
f * Ss32.
f;
1278 Stmp2.
f = Ss.
f * Ss.
f;
1279 Stmp1.
f = Ss22.
f * Stmp2.
f;
1280 Stmp3.
f = Ss11.
f * Stmp2.
f;
1281 Stmp4.
f = Sc.
f * Sc.
f;
1282 Ss11.
f = Ss11.
f * Stmp4.
f;
1283 Ss22.
f = Ss22.
f * Stmp4.
f;
1288 Ss21.
f = Ss21.
f * Stmp4.
f;
1289 Stmp4.
f = Sc.
f * Ss.
f;
1290 Stmp2.
f = Stmp2.
f * Stmp4.
f;
1291 Stmp5.
f = Stmp5.
f * Stmp4.
f;
1296 #ifdef DEBUG_JACOBI_CONJUGATE 1297 printf(
"%.20g\n", Ss11.
f);
1298 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
1299 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
1306 Stmp1.
f = Ssh.
f * Sqvvx.
f;
1307 Stmp2.
f = Ssh.
f * Sqvvy.
f;
1308 Stmp3.
f = Ssh.
f * Sqvvz.
f;
1309 Ssh.
f = Ssh.
f * Sqvs.
f;
1311 Sqvs.
f = Sch.
f * Sqvs.
f;
1312 Sqvvx.
f = Sch.
f * Sqvvx.
f;
1313 Sqvvy.
f = Sch.
f * Sqvvy.
f;
1314 Sqvvz.
f = Sch.
f * Sqvvz.
f;
1321 #ifdef DEBUG_JACOBI_CONJUGATE 1322 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.
f, Sqvvy.
f, Sqvvz.
f,
1329 Ssh.
f = Ss32.
f * 0.5f;
1332 Stmp2.
f = Ssh.
f * Ssh.
f;
1334 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
1335 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
1337 Sch.
ui = Sch.
ui | Stmp2.
ui;
1339 Stmp1.
f = Ssh.
f * Ssh.
f;
1340 Stmp2.
f = Sch.
f * Sch.
f;
1344 Ssh.
f = Stmp4.
f * Ssh.
f;
1345 Sch.
f = Stmp4.
f * Sch.
f;
1347 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
1350 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
1351 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1353 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
1354 Sch.
ui = Sch.
ui | Stmp2.
ui;
1356 Stmp1.
f = Ssh.
f * Ssh.
f;
1357 Stmp2.
f = Sch.
f * Sch.
f;
1359 Ss.
f = Sch.
f * Ssh.
f;
1362 #ifdef DEBUG_JACOBI_CONJUGATE 1363 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
1372 Ss11.
f = Ss11.
f * Stmp3.
f;
1373 Ss21.
f = Ss21.
f * Stmp3.
f;
1374 Ss31.
f = Ss31.
f * Stmp3.
f;
1375 Ss11.
f = Ss11.
f * Stmp3.
f;
1377 Stmp1.
f = Ss.
f * Ss21.
f;
1378 Stmp2.
f = Ss.
f * Ss31.
f;
1379 Ss21.
f = Sc.
f * Ss21.
f;
1380 Ss31.
f = Sc.
f * Ss31.
f;
1384 Stmp2.
f = Ss.
f * Ss.
f;
1385 Stmp1.
f = Ss33.
f * Stmp2.
f;
1386 Stmp3.
f = Ss22.
f * Stmp2.
f;
1387 Stmp4.
f = Sc.
f * Sc.
f;
1388 Ss22.
f = Ss22.
f * Stmp4.
f;
1389 Ss33.
f = Ss33.
f * Stmp4.
f;
1394 Ss32.
f = Ss32.
f * Stmp4.
f;
1395 Stmp4.
f = Sc.
f * Ss.
f;
1396 Stmp2.
f = Stmp2.
f * Stmp4.
f;
1397 Stmp5.
f = Stmp5.
f * Stmp4.
f;
1402 #ifdef DEBUG_JACOBI_CONJUGATE 1403 printf(
"%.20g\n", Ss11.
f);
1404 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
1405 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
1412 Stmp1.
f = Ssh.
f * Sqvvx.
f;
1413 Stmp2.
f = Ssh.
f * Sqvvy.
f;
1414 Stmp3.
f = Ssh.
f * Sqvvz.
f;
1415 Ssh.
f = Ssh.
f * Sqvs.
f;
1417 Sqvs.
f = Sch.
f * Sqvs.
f;
1418 Sqvvx.
f = Sch.
f * Sqvvx.
f;
1419 Sqvvy.
f = Sch.
f * Sqvvy.
f;
1420 Sqvvz.
f = Sch.
f * Sqvvz.
f;
1427 #ifdef DEBUG_JACOBI_CONJUGATE 1428 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.
f, Sqvvy.
f, Sqvvz.
f,
1436 Ssh.
f = Ss31.
f * 0.5f;
1439 Stmp2.
f = Ssh.
f * Ssh.
f;
1441 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
1442 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
1444 Sch.
ui = Sch.
ui | Stmp2.
ui;
1446 Stmp1.
f = Ssh.
f * Ssh.
f;
1447 Stmp2.
f = Sch.
f * Sch.
f;
1451 Ssh.
f = Stmp4.
f * Ssh.
f;
1452 Sch.
f = Stmp4.
f * Sch.
f;
1454 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
1457 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
1458 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1460 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
1461 Sch.
ui = Sch.
ui | Stmp2.
ui;
1463 Stmp1.
f = Ssh.
f * Ssh.
f;
1464 Stmp2.
f = Sch.
f * Sch.
f;
1466 Ss.
f = Sch.
f * Ssh.
f;
1469 #ifdef DEBUG_JACOBI_CONJUGATE 1470 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
1479 Ss22.
f = Ss22.
f * Stmp3.
f;
1480 Ss32.
f = Ss32.
f * Stmp3.
f;
1481 Ss21.
f = Ss21.
f * Stmp3.
f;
1482 Ss22.
f = Ss22.
f * Stmp3.
f;
1484 Stmp1.
f = Ss.
f * Ss32.
f;
1485 Stmp2.
f = Ss.
f * Ss21.
f;
1486 Ss32.
f = Sc.
f * Ss32.
f;
1487 Ss21.
f = Sc.
f * Ss21.
f;
1491 Stmp2.
f = Ss.
f * Ss.
f;
1492 Stmp1.
f = Ss11.
f * Stmp2.
f;
1493 Stmp3.
f = Ss33.
f * Stmp2.
f;
1494 Stmp4.
f = Sc.
f * Sc.
f;
1495 Ss33.
f = Ss33.
f * Stmp4.
f;
1496 Ss11.
f = Ss11.
f * Stmp4.
f;
1501 Ss31.
f = Ss31.
f * Stmp4.
f;
1502 Stmp4.
f = Sc.
f * Ss.
f;
1503 Stmp2.
f = Stmp2.
f * Stmp4.
f;
1504 Stmp5.
f = Stmp5.
f * Stmp4.
f;
1509 #ifdef DEBUG_JACOBI_CONJUGATE 1510 printf(
"%.20g\n", Ss11.
f);
1511 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
1512 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
1519 Stmp1.
f = Ssh.
f * Sqvvx.
f;
1520 Stmp2.
f = Ssh.
f * Sqvvy.
f;
1521 Stmp3.
f = Ssh.
f * Sqvvz.
f;
1522 Ssh.
f = Ssh.
f * Sqvs.
f;
1524 Sqvs.
f = Sch.
f * Sqvs.
f;
1525 Sqvvx.
f = Sch.
f * Sqvvx.
f;
1526 Sqvvy.
f = Sch.
f * Sqvvy.
f;
1527 Sqvvz.
f = Sch.
f * Sqvvz.
f;
1540 Stmp2.
f = Sqvs.
f * Sqvs.
f;
1541 Stmp1.
f = Sqvvx.
f * Sqvvx.
f;
1543 Stmp1.
f = Sqvvy.
f * Sqvvy.
f;
1545 Stmp1.
f = Sqvvz.
f * Sqvvz.
f;
1549 Stmp4.
f = Stmp1.
f * 0.5f;
1550 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1551 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1552 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1556 Sqvs.
f = Sqvs.
f * Stmp1.
f;
1557 Sqvvx.
f = Sqvvx.
f * Stmp1.
f;
1558 Sqvvy.
f = Sqvvy.
f * Stmp1.
f;
1559 Sqvvz.
f = Sqvvz.
f * Stmp1.
f;
1565 Stmp1.
f = Sqvvx.
f * Sqvvx.
f;
1566 Stmp2.
f = Sqvvy.
f * Sqvvy.
f;
1567 Stmp3.
f = Sqvvz.
f * Sqvvz.
f;
1568 Sv11.
f = Sqvs.
f * Sqvs.
f;
1580 Sv32.
f = Sqvs.
f * Stmp1.
f;
1581 Sv13.
f = Sqvs.
f * Stmp2.
f;
1582 Sv21.
f = Sqvs.
f * Stmp3.
f;
1583 Stmp1.
f = Sqvvy.
f * Stmp1.
f;
1584 Stmp2.
f = Sqvvz.
f * Stmp2.
f;
1585 Stmp3.
f = Sqvvx.
f * Stmp3.
f;
1599 Sa12.
f = Sv12.
f * Sa11.
f;
1600 Sa13.
f = Sv13.
f * Sa11.
f;
1601 Sa11.
f = Sv11.
f * Sa11.
f;
1602 Stmp1.
f = Sv21.
f * Stmp2.
f;
1604 Stmp1.
f = Sv31.
f * Stmp3.
f;
1606 Stmp1.
f = Sv22.
f * Stmp2.
f;
1608 Stmp1.
f = Sv32.
f * Stmp3.
f;
1610 Stmp1.
f = Sv23.
f * Stmp2.
f;
1612 Stmp1.
f = Sv33.
f * Stmp3.
f;
1617 Sa22.
f = Sv12.
f * Sa21.
f;
1618 Sa23.
f = Sv13.
f * Sa21.
f;
1619 Sa21.
f = Sv11.
f * Sa21.
f;
1620 Stmp1.
f = Sv21.
f * Stmp2.
f;
1622 Stmp1.
f = Sv31.
f * Stmp3.
f;
1624 Stmp1.
f = Sv22.
f * Stmp2.
f;
1626 Stmp1.
f = Sv32.
f * Stmp3.
f;
1628 Stmp1.
f = Sv23.
f * Stmp2.
f;
1630 Stmp1.
f = Sv33.
f * Stmp3.
f;
1635 Sa32.
f = Sv12.
f * Sa31.
f;
1636 Sa33.
f = Sv13.
f * Sa31.
f;
1637 Sa31.
f = Sv11.
f * Sa31.
f;
1638 Stmp1.
f = Sv21.
f * Stmp2.
f;
1640 Stmp1.
f = Sv31.
f * Stmp3.
f;
1642 Stmp1.
f = Sv22.
f * Stmp2.
f;
1644 Stmp1.
f = Sv32.
f * Stmp3.
f;
1646 Stmp1.
f = Sv23.
f * Stmp2.
f;
1648 Stmp1.
f = Sv33.
f * Stmp3.
f;
1655 Stmp1.
f = Sa11.
f * Sa11.
f;
1656 Stmp4.
f = Sa21.
f * Sa21.
f;
1658 Stmp4.
f = Sa31.
f * Sa31.
f;
1661 Stmp2.
f = Sa12.
f * Sa12.
f;
1662 Stmp4.
f = Sa22.
f * Sa22.
f;
1664 Stmp4.
f = Sa32.
f * Sa32.
f;
1667 Stmp3.
f = Sa13.
f * Sa13.
f;
1668 Stmp4.
f = Sa23.
f * Sa23.
f;
1670 Stmp4.
f = Sa33.
f * Sa33.
f;
1675 Stmp4.
ui = (Stmp1.
f < Stmp2.
f) ? 0xffffffff : 0;
1676 Stmp5.
ui = Sa11.
ui ^ Sa12.
ui;
1677 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1678 Sa11.
ui = Sa11.
ui ^ Stmp5.
ui;
1679 Sa12.
ui = Sa12.
ui ^ Stmp5.
ui;
1681 Stmp5.
ui = Sa21.
ui ^ Sa22.
ui;
1682 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1683 Sa21.
ui = Sa21.
ui ^ Stmp5.
ui;
1684 Sa22.
ui = Sa22.
ui ^ Stmp5.
ui;
1686 Stmp5.
ui = Sa31.
ui ^ Sa32.
ui;
1687 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1688 Sa31.
ui = Sa31.
ui ^ Stmp5.
ui;
1689 Sa32.
ui = Sa32.
ui ^ Stmp5.
ui;
1691 Stmp5.
ui = Sv11.
ui ^ Sv12.
ui;
1692 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1693 Sv11.
ui = Sv11.
ui ^ Stmp5.
ui;
1694 Sv12.
ui = Sv12.
ui ^ Stmp5.
ui;
1696 Stmp5.
ui = Sv21.
ui ^ Sv22.
ui;
1697 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1698 Sv21.
ui = Sv21.
ui ^ Stmp5.
ui;
1699 Sv22.
ui = Sv22.
ui ^ Stmp5.
ui;
1701 Stmp5.
ui = Sv31.
ui ^ Sv32.
ui;
1702 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1703 Sv31.
ui = Sv31.
ui ^ Stmp5.
ui;
1704 Sv32.
ui = Sv32.
ui ^ Stmp5.
ui;
1706 Stmp5.
ui = Stmp1.
ui ^ Stmp2.
ui;
1707 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1708 Stmp1.
ui = Stmp1.
ui ^ Stmp5.
ui;
1709 Stmp2.
ui = Stmp2.
ui ^ Stmp5.
ui;
1715 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1719 Sa12.
f = Sa12.
f * Stmp4.
f;
1720 Sa22.
f = Sa22.
f * Stmp4.
f;
1721 Sa32.
f = Sa32.
f * Stmp4.
f;
1723 Sv12.
f = Sv12.
f * Stmp4.
f;
1724 Sv22.
f = Sv22.
f * Stmp4.
f;
1725 Sv32.
f = Sv32.
f * Stmp4.
f;
1729 Stmp4.
ui = (Stmp1.
f < Stmp3.
f) ? 0xffffffff : 0;
1730 Stmp5.
ui = Sa11.
ui ^ Sa13.
ui;
1731 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1732 Sa11.
ui = Sa11.
ui ^ Stmp5.
ui;
1733 Sa13.
ui = Sa13.
ui ^ Stmp5.
ui;
1735 Stmp5.
ui = Sa21.
ui ^ Sa23.
ui;
1736 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1737 Sa21.
ui = Sa21.
ui ^ Stmp5.
ui;
1738 Sa23.
ui = Sa23.
ui ^ Stmp5.
ui;
1740 Stmp5.
ui = Sa31.
ui ^ Sa33.
ui;
1741 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1742 Sa31.
ui = Sa31.
ui ^ Stmp5.
ui;
1743 Sa33.
ui = Sa33.
ui ^ Stmp5.
ui;
1745 Stmp5.
ui = Sv11.
ui ^ Sv13.
ui;
1746 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1747 Sv11.
ui = Sv11.
ui ^ Stmp5.
ui;
1748 Sv13.
ui = Sv13.
ui ^ Stmp5.
ui;
1750 Stmp5.
ui = Sv21.
ui ^ Sv23.
ui;
1751 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1752 Sv21.
ui = Sv21.
ui ^ Stmp5.
ui;
1753 Sv23.
ui = Sv23.
ui ^ Stmp5.
ui;
1755 Stmp5.
ui = Sv31.
ui ^ Sv33.
ui;
1756 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1757 Sv31.
ui = Sv31.
ui ^ Stmp5.
ui;
1758 Sv33.
ui = Sv33.
ui ^ Stmp5.
ui;
1760 Stmp5.
ui = Stmp1.
ui ^ Stmp3.
ui;
1761 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1762 Stmp1.
ui = Stmp1.
ui ^ Stmp5.
ui;
1763 Stmp3.
ui = Stmp3.
ui ^ Stmp5.
ui;
1769 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1773 Sa11.
f = Sa11.
f * Stmp4.
f;
1774 Sa21.
f = Sa21.
f * Stmp4.
f;
1775 Sa31.
f = Sa31.
f * Stmp4.
f;
1777 Sv11.
f = Sv11.
f * Stmp4.
f;
1778 Sv21.
f = Sv21.
f * Stmp4.
f;
1779 Sv31.
f = Sv31.
f * Stmp4.
f;
1783 Stmp4.
ui = (Stmp2.
f < Stmp3.
f) ? 0xffffffff : 0;
1784 Stmp5.
ui = Sa12.
ui ^ Sa13.
ui;
1785 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1786 Sa12.
ui = Sa12.
ui ^ Stmp5.
ui;
1787 Sa13.
ui = Sa13.
ui ^ Stmp5.
ui;
1789 Stmp5.
ui = Sa22.
ui ^ Sa23.
ui;
1790 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1791 Sa22.
ui = Sa22.
ui ^ Stmp5.
ui;
1792 Sa23.
ui = Sa23.
ui ^ Stmp5.
ui;
1794 Stmp5.
ui = Sa32.
ui ^ Sa33.
ui;
1795 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1796 Sa32.
ui = Sa32.
ui ^ Stmp5.
ui;
1797 Sa33.
ui = Sa33.
ui ^ Stmp5.
ui;
1799 Stmp5.
ui = Sv12.
ui ^ Sv13.
ui;
1800 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1801 Sv12.
ui = Sv12.
ui ^ Stmp5.
ui;
1802 Sv13.
ui = Sv13.
ui ^ Stmp5.
ui;
1804 Stmp5.
ui = Sv22.
ui ^ Sv23.
ui;
1805 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1806 Sv22.
ui = Sv22.
ui ^ Stmp5.
ui;
1807 Sv23.
ui = Sv23.
ui ^ Stmp5.
ui;
1809 Stmp5.
ui = Sv32.
ui ^ Sv33.
ui;
1810 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1811 Sv32.
ui = Sv32.
ui ^ Stmp5.
ui;
1812 Sv33.
ui = Sv33.
ui ^ Stmp5.
ui;
1814 Stmp5.
ui = Stmp2.
ui ^ Stmp3.
ui;
1815 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1816 Stmp2.
ui = Stmp2.
ui ^ Stmp5.
ui;
1817 Stmp3.
ui = Stmp3.
ui ^ Stmp5.
ui;
1823 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1827 Sa13.
f = Sa13.
f * Stmp4.
f;
1828 Sa23.
f = Sa23.
f * Stmp4.
f;
1829 Sa33.
f = Sa33.
f * Stmp4.
f;
1831 Sv13.
f = Sv13.
f * Stmp4.
f;
1832 Sv23.
f = Sv23.
f * Stmp4.
f;
1833 Sv33.
f = Sv33.
f * Stmp4.
f;
1849 Ssh.
f = Sa21.
f * Sa21.
f;
1850 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
1851 Ssh.
ui = Ssh.
ui & Sa21.
ui;
1855 Sch.
f = max(Sch.
f, Sa11.
f);
1856 Sch.
f = max(Sch.
f, gsmall_number);
1857 Stmp5.
ui = (Sa11.
f >= Stmp5.
f) ? 0xffffffff : 0;
1859 Stmp1.
f = Sch.
f * Sch.
f;
1860 Stmp2.
f = Ssh.
f * Ssh.
f;
1864 Stmp4.
f = Stmp1.
f * 0.5f;
1865 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1866 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1867 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1870 Stmp1.
f = Stmp1.
f * Stmp2.
f;
1874 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
1875 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
1876 Sch.
ui = Stmp5.
ui & Sch.
ui;
1877 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
1878 Sch.
ui = Sch.
ui | Stmp1.
ui;
1879 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1881 Stmp1.
f = Sch.
f * Sch.
f;
1882 Stmp2.
f = Ssh.
f * Ssh.
f;
1886 Stmp4.
f = Stmp1.
f * 0.5f;
1887 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1888 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1889 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1893 Sch.
f = Sch.
f * Stmp1.
f;
1894 Ssh.
f = Ssh.
f * Stmp1.
f;
1896 Sc.
f = Sch.
f * Sch.
f;
1897 Ss.
f = Ssh.
f * Ssh.
f;
1899 Ss.
f = Ssh.
f * Sch.
f;
1906 Stmp1.
f = Ss.
f * Sa11.
f;
1907 Stmp2.
f = Ss.
f * Sa21.
f;
1908 Sa11.
f = Sc.
f * Sa11.
f;
1909 Sa21.
f = Sc.
f * Sa21.
f;
1913 Stmp1.
f = Ss.
f * Sa12.
f;
1914 Stmp2.
f = Ss.
f * Sa22.
f;
1915 Sa12.
f = Sc.
f * Sa12.
f;
1916 Sa22.
f = Sc.
f * Sa22.
f;
1920 Stmp1.
f = Ss.
f * Sa13.
f;
1921 Stmp2.
f = Ss.
f * Sa23.
f;
1922 Sa13.
f = Sc.
f * Sa13.
f;
1923 Sa23.
f = Sc.
f * Sa23.
f;
1931 Stmp1.
f = Ss.
f * Su11.
f;
1932 Stmp2.
f = Ss.
f * Su12.
f;
1933 Su11.
f = Sc.
f * Su11.
f;
1934 Su12.
f = Sc.
f * Su12.
f;
1938 Stmp1.
f = Ss.
f * Su21.
f;
1939 Stmp2.
f = Ss.
f * Su22.
f;
1940 Su21.
f = Sc.
f * Su21.
f;
1941 Su22.
f = Sc.
f * Su22.
f;
1945 Stmp1.
f = Ss.
f * Su31.
f;
1946 Stmp2.
f = Ss.
f * Su32.
f;
1947 Su31.
f = Sc.
f * Su31.
f;
1948 Su32.
f = Sc.
f * Su32.
f;
1954 Ssh.
f = Sa31.
f * Sa31.
f;
1955 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
1956 Ssh.
ui = Ssh.
ui & Sa31.
ui;
1960 Sch.
f = max(Sch.
f, Sa11.
f);
1961 Sch.
f = max(Sch.
f, gsmall_number);
1962 Stmp5.
ui = (Sa11.
f >= Stmp5.
f) ? 0xffffffff : 0;
1964 Stmp1.
f = Sch.
f * Sch.
f;
1965 Stmp2.
f = Ssh.
f * Ssh.
f;
1969 Stmp4.
f = Stmp1.
f * 0.5;
1970 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1971 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1972 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1975 Stmp1.
f = Stmp1.
f * Stmp2.
f;
1979 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
1980 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
1981 Sch.
ui = Stmp5.
ui & Sch.
ui;
1982 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
1983 Sch.
ui = Sch.
ui | Stmp1.
ui;
1984 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1986 Stmp1.
f = Sch.
f * Sch.
f;
1987 Stmp2.
f = Ssh.
f * Ssh.
f;
1991 Stmp4.
f = Stmp1.
f * 0.5f;
1992 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1993 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1994 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1998 Sch.
f = Sch.
f * Stmp1.
f;
1999 Ssh.
f = Ssh.
f * Stmp1.
f;
2001 Sc.
f = Sch.
f * Sch.
f;
2002 Ss.
f = Ssh.
f * Ssh.
f;
2004 Ss.
f = Ssh.
f * Sch.
f;
2011 Stmp1.
f = Ss.
f * Sa11.
f;
2012 Stmp2.
f = Ss.
f * Sa31.
f;
2013 Sa11.
f = Sc.
f * Sa11.
f;
2014 Sa31.
f = Sc.
f * Sa31.
f;
2018 Stmp1.
f = Ss.
f * Sa12.
f;
2019 Stmp2.
f = Ss.
f * Sa32.
f;
2020 Sa12.
f = Sc.
f * Sa12.
f;
2021 Sa32.
f = Sc.
f * Sa32.
f;
2025 Stmp1.
f = Ss.
f * Sa13.
f;
2026 Stmp2.
f = Ss.
f * Sa33.
f;
2027 Sa13.
f = Sc.
f * Sa13.
f;
2028 Sa33.
f = Sc.
f * Sa33.
f;
2036 Stmp1.
f = Ss.
f * Su11.
f;
2037 Stmp2.
f = Ss.
f * Su13.
f;
2038 Su11.
f = Sc.
f * Su11.
f;
2039 Su13.
f = Sc.
f * Su13.
f;
2043 Stmp1.
f = Ss.
f * Su21.
f;
2044 Stmp2.
f = Ss.
f * Su23.
f;
2045 Su21.
f = Sc.
f * Su21.
f;
2046 Su23.
f = Sc.
f * Su23.
f;
2050 Stmp1.
f = Ss.
f * Su31.
f;
2051 Stmp2.
f = Ss.
f * Su33.
f;
2052 Su31.
f = Sc.
f * Su31.
f;
2053 Su33.
f = Sc.
f * Su33.
f;
2059 Ssh.
f = Sa32.
f * Sa32.
f;
2060 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
2061 Ssh.
ui = Ssh.
ui & Sa32.
ui;
2065 Sch.
f = max(Sch.
f, Sa22.
f);
2066 Sch.
f = max(Sch.
f, gsmall_number);
2067 Stmp5.
ui = (Sa22.
f >= Stmp5.
f) ? 0xffffffff : 0;
2069 Stmp1.
f = Sch.
f * Sch.
f;
2070 Stmp2.
f = Ssh.
f * Ssh.
f;
2074 Stmp4.
f = Stmp1.
f * 0.5f;
2075 Stmp3.
f = Stmp1.
f * Stmp4.
f;
2076 Stmp3.
f = Stmp1.
f * Stmp3.
f;
2077 Stmp3.
f = Stmp2.
f * Stmp3.
f;
2080 Stmp1.
f = Stmp1.
f * Stmp2.
f;
2084 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
2085 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
2086 Sch.
ui = Stmp5.
ui & Sch.
ui;
2087 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
2088 Sch.
ui = Sch.
ui | Stmp1.
ui;
2089 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
2091 Stmp1.
f = Sch.
f * Sch.
f;
2092 Stmp2.
f = Ssh.
f * Ssh.
f;
2096 Stmp4.
f = Stmp1.
f * 0.5f;
2097 Stmp3.
f = Stmp1.
f * Stmp4.
f;
2098 Stmp3.
f = Stmp1.
f * Stmp3.
f;
2099 Stmp3.
f = Stmp2.
f * Stmp3.
f;
2103 Sch.
f = Sch.
f * Stmp1.
f;
2104 Ssh.
f = Ssh.
f * Stmp1.
f;
2106 Sc.
f = Sch.
f * Sch.
f;
2107 Ss.
f = Ssh.
f * Ssh.
f;
2109 Ss.
f = Ssh.
f * Sch.
f;
2116 Stmp1.
f = Ss.
f * Sa21.
f;
2117 Stmp2.
f = Ss.
f * Sa31.
f;
2118 Sa21.
f = Sc.
f * Sa21.
f;
2119 Sa31.
f = Sc.
f * Sa31.
f;
2123 Stmp1.
f = Ss.
f * Sa22.
f;
2124 Stmp2.
f = Ss.
f * Sa32.
f;
2125 Sa22.
f = Sc.
f * Sa22.
f;
2126 Sa32.
f = Sc.
f * Sa32.
f;
2130 Stmp1.
f = Ss.
f * Sa23.
f;
2131 Stmp2.
f = Ss.
f * Sa33.
f;
2132 Sa23.
f = Sc.
f * Sa23.
f;
2133 Sa33.
f = Sc.
f * Sa33.
f;
2141 Stmp1.
f = Ss.
f * Su12.
f;
2142 Stmp2.
f = Ss.
f * Su13.
f;
2143 Su12.
f = Sc.
f * Su12.
f;
2144 Su13.
f = Sc.
f * Su13.
f;
2148 Stmp1.
f = Ss.
f * Su22.
f;
2149 Stmp2.
f = Ss.
f * Su23.
f;
2150 Su22.
f = Sc.
f * Su22.
f;
2151 Su23.
f = Sc.
f * Su23.
f;
2155 Stmp1.
f = Ss.
f * Su32.
f;
2156 Stmp2.
f = Ss.
f * Su33.
f;
2157 Su32.
f = Sc.
f * Su32.
f;
2158 Su33.
f = Sc.
f * Su33.
f;
2189 template <
typename scalar_t>
2191 const scalar_t *A_3x3,
2192 const scalar_t *B_3x1,
2203 const scalar_t epsilon = 1e-10;
2204 S[0] = abs(S[0]) < epsilon ? 0 : 1.0 / S[0];
2205 S[1] = abs(S[1]) < epsilon ? 0 : 1.0 / S[1];
2206 S[2] = abs(S[2]) < epsilon ? 0 : 1.0 / S[2];
2213 S_UT[0] = U[0] * S[0];
2214 S_UT[1] = U[3] * S[0];
2215 S_UT[2] = U[6] * S[0];
2216 S_UT[3] = U[1] * S[1];
2217 S_UT[4] = U[4] * S[1];
2218 S_UT[5] = U[7] * S[1];
2219 S_UT[6] = U[2] * S[2];
2220 S_UT[7] = U[5] * S[2];
2221 S_UT[8] = U[8] * S[2];
2226 scalar_t Ainv[9] = {0};
OPEN3D_DEVICE OPEN3D_FORCE_INLINE void solve_svd3x3(const scalar_t *A_3x3, const scalar_t *B_3x1, scalar_t *X_3x1)
Definition: SVD3x3.h:2190
#define __dsub_rn(x, y)
Definition: SVD3x3.h:86
OPEN3D_DEVICE OPEN3D_FORCE_INLINE void matmul3x3_3x3(const scalar_t *A_3x3, const scalar_t *B_3x3, scalar_t *C_3x3)
Definition: Matrix.h:67
OPEN3D_DEVICE OPEN3D_FORCE_INLINE void svd3x3< double >(const double *A_3x3, double *U_3x3, double *S_3x1, double *V_3x3)
Definition: SVD3x3.h:112
#define OPEN3D_FORCE_INLINE
Definition: CUDAUtils.h:62
#define __fsub_rn(x, y)
Definition: SVD3x3.h:82
#define __frsqrt_rn(x)
Definition: SVD3x3.h:83
#define gsine_pi_over_eight
Definition: SVD3x3.h:59
#define OPEN3D_DEVICE
Definition: CUDAUtils.h:64
#define __drsqrt_rn(x)
Definition: SVD3x3.h:87
scalar_t f
Definition: SVD3x3.h:101
OPEN3D_DEVICE OPEN3D_FORCE_INLINE void svd3x3< float >(const float *A_3x3, float *U_3x3, float *S_3x1, float *V_3x3)
Definition: SVD3x3.h:1151
#define gcosine_pi_over_eight
Definition: SVD3x3.h:61
#define gtiny_number
Definition: SVD3x3.h:62
unsigned int ui
Definition: SVD3x3.h:102
Definition: PinholeCameraIntrinsic.cpp:35
#define __dadd_rn(x, y)
Definition: SVD3x3.h:85
#define gfour_gamma_squared
Definition: SVD3x3.h:63
#define __fadd_rn(x, y)
Definition: SVD3x3.h:81
OPEN3D_DEVICE OPEN3D_FORCE_INLINE void matmul3x3_3x1(const scalar_t *A_3x3, const scalar_t *B_3x1, scalar_t *C_3x1)
Definition: Matrix.h:58
OPEN3D_DEVICE OPEN3D_FORCE_INLINE void svd3x3(const scalar_t *A_3x3, scalar_t *U_3x3, scalar_t *S_3x1, scalar_t *V_3x3)
#define gone
Definition: SVD3x3.h:58