19 #ifndef _libint2_src_lib_libint_osvrrsxsx_h_ 20 #define _libint2_src_lib_libint_osvrrsxsx_h_ 24 #include <util_types.h> 25 #include <libint2/cgshell_ordering.h> 28 #pragma implementation 33 template <
int part,
int Lb,
int Ld,
bool unit_a,
bool vectorize>
struct OSVRR_sx_sx {
34 static void compute(
const Libint_t* inteval,
35 LIBINT2_REALTYPE* target,
36 const LIBINT2_REALTYPE* src1,
37 const LIBINT2_REALTYPE* src0,
38 const LIBINT2_REALTYPE* src2,
39 const LIBINT2_REALTYPE* src3,
40 const LIBINT2_REALTYPE* src4);
50 template <
int Lb,
int Ld,
52 bool vectorize>
struct OSVRR_sx_sx<0,Lb,Ld,unit_a,vectorize> {
54 static void compute(
const Libint_t* inteval,
55 LIBINT2_REALTYPE* target,
56 const LIBINT2_REALTYPE* src0,
57 const LIBINT2_REALTYPE* src1,
58 const LIBINT2_REALTYPE* src2,
59 const LIBINT2_REALTYPE* src3,
60 const LIBINT2_REALTYPE* src4) {
63 assert(not (Lb < 2 || Ld < 1));
65 const unsigned int veclen = vectorize ? inteval->veclen : 1;
67 const unsigned int Nd = INT_NCART(Ld);
68 const unsigned int NdV = Nd * veclen;
71 FOR_CART(bx, by, bz, Lb)
73 int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
75 enum XYZ {x=0, y=1, z=2};
83 const LIBINT2_REALTYPE *PB, *WP;
86 #if LIBINT2_DEFINED(eri,PB_x) 87 if (not unit_a) PB = inteval->PB_x;
92 #if LIBINT2_DEFINED(eri,PB_y) 93 if (not unit_a) PB = inteval->PB_y;
98 #if LIBINT2_DEFINED(eri,PB_z) 99 if (not unit_a) PB = inteval->PB_z;
105 const unsigned int ibm1 = INT_CARTINDEX(Lb-1,b[0],b[1]);
106 const unsigned int bm10d0_offset = ibm1 * NdV;
107 const LIBINT2_REALTYPE* src0_ptr = unit_a ? 0 : src0 + bm10d0_offset;
108 const LIBINT2_REALTYPE* src1_ptr = src1 + bm10d0_offset;
113 const unsigned int ibm2 = INT_CARTINDEX(Lb-2,b[0],b[1]);
114 const unsigned int bm20d0_offset = ibm2 * NdV;
116 const LIBINT2_REALTYPE* src2_ptr = src2 + bm20d0_offset;
117 const LIBINT2_REALTYPE* src3_ptr = src3 + bm20d0_offset;
118 const LIBINT2_REALTYPE bxyz = (LIBINT2_REALTYPE)b[xyz];
121 for(
unsigned int d = 0; d < Nd; ++d) {
122 for(
unsigned int v=0; v<veclen; ++v, ++dv) {
123 LIBINT2_REALTYPE value = WP[v] * src1_ptr[dv] + bxyz * inteval->oo2z[v] * (src2_ptr[dv] - inteval->roz[v] * src3_ptr[dv]);
124 if (not unit_a) value += PB[v] * src0_ptr[dv];
128 #if LIBINT2_FLOP_COUNT 129 inteval->nflops[0] += (unit_a ? 6 : 8) * NdV;
135 for(
unsigned int d = 0; d < Nd; ++d) {
136 for(
unsigned int v=0; v<veclen; ++v, ++dv) {
137 LIBINT2_REALTYPE value = WP[v] * src1_ptr[dv];
138 if (not unit_a) value += PB[v] * src0_ptr[dv];
142 #if LIBINT2_FLOP_COUNT 143 inteval->nflops[0] += (unit_a ? 1 : 3) * NdV;
148 const unsigned int Ndm1 = INT_NCART(Ld-1);
149 const unsigned int Ndm1V = Ndm1 * veclen;
150 const unsigned int bm10dm10_offset = ibm1 * Ndm1V;
151 const LIBINT2_REALTYPE* src4_ptr = src4 + bm10dm10_offset;
155 FOR_CART(dx, dy, dz, Ld-1)
157 int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
160 const unsigned int dc = INT_CARTINDEX(Ld,d[0],d[1]);
161 const unsigned int dc_offset = dc * veclen;
162 LIBINT2_REALTYPE* tptr = target + dc_offset;
163 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
164 for(
unsigned int v=0; v<veclen; ++v) {
165 tptr[v] += dxyz * inteval->oo2ze[v] * src4_ptr[v];
167 #if LIBINT2_FLOP_COUNT 168 inteval->nflops[0] += 3 * veclen;
191 template <
int Lb,
int Ld,
bool vectorize>
struct OSVRR_sx_sx<1,Lb,Ld,vectorize> {
193 static void compute(
const Libint_t* inteval,
194 LIBINT2_REALTYPE* target,
195 const LIBINT2_REALTYPE* src0,
196 const LIBINT2_REALTYPE* src1,
197 const LIBINT2_REALTYPE* src2,
198 const LIBINT2_REALTYPE* src3,
199 const LIBINT2_REALTYPE* src4) {
202 if (Lb < 1 || Ld < 2)
218 const unsigned int veclen = vectorize ? inteval->veclen : 1;
220 const unsigned int Nb = INT_NCART(Lb);
221 const unsigned int Nd = INT_NCART(Ld);
222 const unsigned int Ndv = Nd * veclen;
223 const unsigned int Ndm1 = INT_NCART(Ld-1);
224 const unsigned int Ndm1v = Ndm1 * veclen;
225 const unsigned int Ndm2 = INT_NCART(Ld-2);
226 const unsigned int Ndm2v = Ndm2 * veclen;
230 FOR_CART(dx, dy, dz, Ld)
232 int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
234 enum XYZ {x=0, y=1, z=2};
237 if (dy != 0) xyz = y;
238 if (dx != 0) xyz = x;
242 const LIBINT2_REALTYPE *QD, *WQ;
258 const unsigned int idm1 = INT_CARTINDEX(Ld-1,d[0],d[1]);
259 const unsigned int d0_offset =
id * veclen;
260 const unsigned int dm10_offset = idm1 * veclen;
261 LIBINT2_REALTYPE* target_ptr = target + d0_offset;
262 const LIBINT2_REALTYPE* src0_ptr = src0 + dm10_offset;
263 const LIBINT2_REALTYPE* src1_ptr = src1 + dm10_offset;
268 const unsigned int idm2 = INT_CARTINDEX(Ld-2,d[0],d[1]);
269 const unsigned int dm20_offset = idm2 * veclen;
271 const LIBINT2_REALTYPE* src2_ptr = src2 + dm20_offset;
272 const LIBINT2_REALTYPE* src3_ptr = src3 + dm20_offset;
273 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
275 for(
unsigned int b = 0; b < Nb; ++b) {
276 for(
unsigned int v=0; v<veclen; ++v) {
277 target_ptr[v] = QD[v] * src0_ptr[v] + WQ[v] * src1_ptr[v]
278 + dxyz * inteval->oo2e[v] * (src2_ptr[v] - inteval->roe[v] * src3_ptr[v]);
286 #if LIBINT2_FLOP_COUNT 287 inteval->nflops[0] += 8 * Nb * veclen;
292 for(
unsigned int b = 0; b < Nb; ++b) {
293 for(
unsigned int v=0; v<veclen; ++v) {
294 target_ptr[v] = QD[v] * src0_ptr[v] + WQ[v] * src1_ptr[v];
300 #if LIBINT2_FLOP_COUNT 301 inteval->nflops[0] += 3 * Nb * veclen;
306 const LIBINT2_REALTYPE* src4_ptr = src4 + dm10_offset;
310 FOR_CART(bx, by, bz, Lb-1)
312 int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
315 const unsigned int ib = INT_CARTINDEX(Lb,b[0],b[1]);
316 const unsigned int b0d0_offset = ib * Ndv + d0_offset;
317 LIBINT2_REALTYPE* target_ptr = target + b0d0_offset;
318 const LIBINT2_REALTYPE bxyz = (LIBINT2_REALTYPE)b[xyz];
319 for(
unsigned int v=0; v<veclen; ++v) {
320 target_ptr[v] += bxyz * inteval->oo2ze[v] * src4_ptr[v];
322 #if LIBINT2_FLOP_COUNT 323 inteval->nflops[0] += 3 * veclen;
339 template <
int part,
int Lb,
int Ld,
bool vectorize>
struct OSAVRR_sx_sx {
340 static void compute(
const Libint_t* inteval,
341 LIBINT2_REALTYPE* target,
342 const LIBINT2_REALTYPE* src1,
343 const LIBINT2_REALTYPE* src4);
350 template <
int Lb,
int Ld,
353 static void compute(
const Libint_t* inteval,
354 LIBINT2_REALTYPE* target,
355 const LIBINT2_REALTYPE* src1,
356 const LIBINT2_REALTYPE* src4) {
359 assert(not (Lb < 2 || Ld < 1));
361 const unsigned int veclen = vectorize ? inteval->veclen : 1;
363 const unsigned int Nd = INT_NCART(Ld);
364 const unsigned int NdV = Nd * veclen;
367 FOR_CART(bx, by, bz, Lb)
369 int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
371 enum XYZ {x=0, y=1, z=2};
374 if (by != 0) xyz = y;
375 if (bx != 0) xyz = x;
379 const LIBINT2_REALTYPE *WP;
392 const unsigned int ibm1 = INT_CARTINDEX(Lb-1,b[0],b[1]);
393 const unsigned int bm10d0_offset = ibm1 * NdV;
394 const LIBINT2_REALTYPE* src1_ptr = src1 + bm10d0_offset;
398 for(
unsigned int d = 0; d < Nd; ++d) {
399 for(
unsigned int v=0; v<veclen; ++v, ++dv) {
400 target[dv] = WP[v] * src1_ptr[dv];
403 #if LIBINT2_FLOP_COUNT 404 inteval->nflops[0] += NdV;
409 const unsigned int Ndm1 = INT_NCART(Ld-1);
410 const unsigned int Ndm1V = Ndm1 * veclen;
411 const unsigned int bm10dm10_offset = ibm1 * Ndm1V;
412 const LIBINT2_REALTYPE* src4_ptr = src4 + bm10dm10_offset;
416 FOR_CART(dx, dy, dz, Ld-1)
418 int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
421 const unsigned int dc = INT_CARTINDEX(Ld,d[0],d[1]);
422 const unsigned int dc_offset = dc * veclen;
423 LIBINT2_REALTYPE* tptr = target + dc_offset;
424 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
425 for(
unsigned int v=0; v<veclen; ++v) {
426 tptr[v] += dxyz * inteval->oo2ze[v] * src4_ptr[v];
428 #if LIBINT2_FLOP_COUNT 429 inteval->nflops[0] += 3 * veclen;
447 #endif // header guard Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:23
Definition: OSVRR_sx_sx.h:339
Definition: test_eri_rys.cc:46
Definition: OSVRR_sx_sx.h:33