LIBINT  2.1.0-stable
OSVRR_sx_sx.h
1 /*
2  * This file is a part of Libint.
3  * Copyright (C) 2004-2014 Edward F. Valeev
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Library General Public License, version 2,
7  * as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public License
15  * along with this program. If not, see http://www.gnu.org/licenses/.
16  *
17  */
18 
19 #ifndef _libint2_src_lib_libint_osvrrsxsx_h_
20 #define _libint2_src_lib_libint_osvrrsxsx_h_
21 
22 #include <cstdlib>
23 #include <libint2.h>
24 #include <util_types.h>
25 #include <libint2/cgshell_ordering.h>
26 
27 #ifdef __GNUC__
28 #pragma implementation
29 #endif
30 
31 namespace libint2 {
32 
33  template <int part, int Lb, int Ld, bool unit_a, bool vectorize> struct OSVRR_sx_sx {
34  static void compute(const Libint_t* inteval,
35  LIBINT2_REALTYPE* target,
36  const LIBINT2_REALTYPE* src1,
37  const LIBINT2_REALTYPE* src0,
38  const LIBINT2_REALTYPE* src2,
39  const LIBINT2_REALTYPE* src3,
40  const LIBINT2_REALTYPE* src4);
41  };
42 
50  template <int Lb, int Ld,
51  bool unit_a,
52  bool vectorize> struct OSVRR_sx_sx<0,Lb,Ld,unit_a,vectorize> {
53 
54  static void compute(const Libint_t* inteval,
55  LIBINT2_REALTYPE* target,
56  const LIBINT2_REALTYPE* src0,
57  const LIBINT2_REALTYPE* src1,
58  const LIBINT2_REALTYPE* src2,
59  const LIBINT2_REALTYPE* src3,
60  const LIBINT2_REALTYPE* src4) {
61 
62  // works for (sd|sp) and higher
63  assert(not (Lb < 2 || Ld < 1));
64 
65  const unsigned int veclen = vectorize ? inteval->veclen : 1;
66 
67  const unsigned int Nd = INT_NCART(Ld);
68  const unsigned int NdV = Nd * veclen;
69 
70  int bx, by, bz;
71  FOR_CART(bx, by, bz, Lb)
72 
73  int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
74 
75  enum XYZ {x=0, y=1, z=2};
76  // Build along x, if possible
77  XYZ xyz = z;
78  if (by != 0) xyz = y;
79  if (bx != 0) xyz = x;
80  --b[xyz];
81 
82  // redirect
83  const LIBINT2_REALTYPE *PB, *WP;
84  switch(xyz) {
85  case x:
86 #if LIBINT2_DEFINED(eri,PB_x)
87  if (not unit_a) PB = inteval->PB_x;
88 #endif
89  WP = inteval->WP_x;
90  break;
91  case y:
92 #if LIBINT2_DEFINED(eri,PB_y)
93  if (not unit_a) PB = inteval->PB_y;
94 #endif
95  WP = inteval->WP_y;
96  break;
97  case z:
98 #if LIBINT2_DEFINED(eri,PB_z)
99  if (not unit_a) PB = inteval->PB_z;
100 #endif
101  WP = inteval->WP_z;
102  break;
103  }
104 
105  const unsigned int ibm1 = INT_CARTINDEX(Lb-1,b[0],b[1]);
106  const unsigned int bm10d0_offset = ibm1 * NdV;
107  const LIBINT2_REALTYPE* src0_ptr = unit_a ? 0 : src0 + bm10d0_offset;
108  const LIBINT2_REALTYPE* src1_ptr = src1 + bm10d0_offset;
109 
110  // if b-2_xyz exists, include (0 b-2_xyz | 0 d)
111  if (b[xyz] > 0) {
112  --b[xyz];
113  const unsigned int ibm2 = INT_CARTINDEX(Lb-2,b[0],b[1]);
114  const unsigned int bm20d0_offset = ibm2 * NdV;
115  ++b[xyz];
116  const LIBINT2_REALTYPE* src2_ptr = src2 + bm20d0_offset;
117  const LIBINT2_REALTYPE* src3_ptr = src3 + bm20d0_offset;
118  const LIBINT2_REALTYPE bxyz = (LIBINT2_REALTYPE)b[xyz];
119 
120  unsigned int dv = 0;
121  for(unsigned int d = 0; d < Nd; ++d) {
122  for(unsigned int v=0; v<veclen; ++v, ++dv) {
123  LIBINT2_REALTYPE value = WP[v] * src1_ptr[dv] + bxyz * inteval->oo2z[v] * (src2_ptr[dv] - inteval->roz[v] * src3_ptr[dv]);
124  if (not unit_a) value += PB[v] * src0_ptr[dv];
125  target[dv] = value;
126  }
127  }
128 #if LIBINT2_FLOP_COUNT
129  inteval->nflops[0] += (unit_a ? 6 : 8) * NdV;
130 #endif
131 
132  }
133  else {
134  unsigned int dv = 0;
135  for(unsigned int d = 0; d < Nd; ++d) {
136  for(unsigned int v=0; v<veclen; ++v, ++dv) {
137  LIBINT2_REALTYPE value = WP[v] * src1_ptr[dv];
138  if (not unit_a) value += PB[v] * src0_ptr[dv];
139  target[dv] = value;
140  }
141  }
142 #if LIBINT2_FLOP_COUNT
143  inteval->nflops[0] += (unit_a ? 1 : 3) * NdV;
144 #endif
145  }
146 
147  {
148  const unsigned int Ndm1 = INT_NCART(Ld-1);
149  const unsigned int Ndm1V = Ndm1 * veclen;
150  const unsigned int bm10dm10_offset = ibm1 * Ndm1V;
151  const LIBINT2_REALTYPE* src4_ptr = src4 + bm10dm10_offset;
152 
153  // loop over d-1 shell and include (0 b-1_xyz | 0 d-1_xyz) to (0 b | 0 d)
154  int dx, dy, dz;
155  FOR_CART(dx, dy, dz, Ld-1)
156 
157  int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
158  ++d[xyz];
159 
160  const unsigned int dc = INT_CARTINDEX(Ld,d[0],d[1]);
161  const unsigned int dc_offset = dc * veclen;
162  LIBINT2_REALTYPE* tptr = target + dc_offset;
163  const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
164  for(unsigned int v=0; v<veclen; ++v) {
165  tptr[v] += dxyz * inteval->oo2ze[v] * src4_ptr[v];
166  }
167 #if LIBINT2_FLOP_COUNT
168  inteval->nflops[0] += 3 * veclen;
169 #endif
170  src4_ptr += veclen;
171 
172  END_FOR_CART
173  }
174 
175  target += NdV;
176 
177  END_FOR_CART
178 
179  }
180 
181  };
182 
183 #if 0
184 
191  template <int Lb, int Ld, bool vectorize> struct OSVRR_sx_sx<1,Lb,Ld,vectorize> {
192 
193  static void compute(const Libint_t* inteval,
194  LIBINT2_REALTYPE* target,
195  const LIBINT2_REALTYPE* src0,
196  const LIBINT2_REALTYPE* src1,
197  const LIBINT2_REALTYPE* src2,
198  const LIBINT2_REALTYPE* src3,
199  const LIBINT2_REALTYPE* src4) {
200 
201  // works for (sp|sd) and higher
202  if (Lb < 1 || Ld < 2)
203  abort();
204 
205  // ALGORITHM
206  // loop over functions in d
207  // decide in which direction can build (x, y, or z)
208  // decide whether d-2 exists
209  // loop over functions in b
210  // include contributions from (0 b | 0 d-1)^(m), (0 b | 0 d-1)^(m+1),
211  // and possibly (0 b | 0 d-2)^(m), (0 b | 0 d-2)^(m+1) for each b
212  // end of loop over b
213  // loop over b-1
214  // include contribution from (0 b-1 | 0 d-1)^(m+1)
215  // end of loop over b-1
216  // end of loop over d
217 
218  const unsigned int veclen = vectorize ? inteval->veclen : 1;
219 
220  const unsigned int Nb = INT_NCART(Lb);
221  const unsigned int Nd = INT_NCART(Ld);
222  const unsigned int Ndv = Nd * veclen;
223  const unsigned int Ndm1 = INT_NCART(Ld-1);
224  const unsigned int Ndm1v = Ndm1 * veclen;
225  const unsigned int Ndm2 = INT_NCART(Ld-2);
226  const unsigned int Ndm2v = Ndm2 * veclen;
227 
228  int dx, dy, dz;
229  int id = 0;
230  FOR_CART(dx, dy, dz, Ld)
231 
232  int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
233 
234  enum XYZ {x=0, y=1, z=2};
235  // Build along x, if possible
236  XYZ xyz = z;
237  if (dy != 0) xyz = y;
238  if (dx != 0) xyz = x;
239  --d[xyz];
240 
241  // redirect
242  const LIBINT2_REALTYPE *QD, *WQ;
243  switch(xyz) {
244  case x:
245  QD = inteval->QD_x;
246  WQ = inteval->WQ_x;
247  break;
248  case y:
249  QD = inteval->QD_y;
250  WQ = inteval->WQ_y;
251  break;
252  case z:
253  QD = inteval->QD_z;
254  WQ = inteval->WQ_z;
255  break;
256  }
257 
258  const unsigned int idm1 = INT_CARTINDEX(Ld-1,d[0],d[1]);
259  const unsigned int d0_offset = id * veclen;
260  const unsigned int dm10_offset = idm1 * veclen;
261  LIBINT2_REALTYPE* target_ptr = target + d0_offset;
262  const LIBINT2_REALTYPE* src0_ptr = src0 + dm10_offset;
263  const LIBINT2_REALTYPE* src1_ptr = src1 + dm10_offset;
264 
265  // if d-2_xyz exists, include (0 b | 0 d-2_xyz)
266  if (d[xyz] > 0) {
267  --d[xyz];
268  const unsigned int idm2 = INT_CARTINDEX(Ld-2,d[0],d[1]);
269  const unsigned int dm20_offset = idm2 * veclen;
270  ++d[xyz];
271  const LIBINT2_REALTYPE* src2_ptr = src2 + dm20_offset;
272  const LIBINT2_REALTYPE* src3_ptr = src3 + dm20_offset;
273  const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
274 
275  for(unsigned int b = 0; b < Nb; ++b) {
276  for(unsigned int v=0; v<veclen; ++v) {
277  target_ptr[v] = QD[v] * src0_ptr[v] + WQ[v] * src1_ptr[v]
278  + dxyz * inteval->oo2e[v] * (src2_ptr[v] - inteval->roe[v] * src3_ptr[v]);
279  }
280  target_ptr += Ndv;
281  src0_ptr += Ndm1v;
282  src1_ptr += Ndm1v;
283  src2_ptr += Ndm2v;
284  src3_ptr += Ndm2v;
285  }
286 #if LIBINT2_FLOP_COUNT
287  inteval->nflops[0] += 8 * Nb * veclen;
288 #endif
289 
290  }
291  else {
292  for(unsigned int b = 0; b < Nb; ++b) {
293  for(unsigned int v=0; v<veclen; ++v) {
294  target_ptr[v] = QD[v] * src0_ptr[v] + WQ[v] * src1_ptr[v];
295  }
296  target_ptr += Ndv;
297  src0_ptr += Ndm1v;
298  src1_ptr += Ndm1v;
299  }
300 #if LIBINT2_FLOP_COUNT
301  inteval->nflops[0] += 3 * Nb * veclen;
302 #endif
303  }
304 
305  {
306  const LIBINT2_REALTYPE* src4_ptr = src4 + dm10_offset;
307 
308  // loop over b-1 shell and include (0 b-1_xyz | 0 d-1_xyz) to (0 b | 0 d)
309  int bx, by, bz;
310  FOR_CART(bx, by, bz, Lb-1)
311 
312  int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
313  ++b[xyz];
314 
315  const unsigned int ib = INT_CARTINDEX(Lb,b[0],b[1]);
316  const unsigned int b0d0_offset = ib * Ndv + d0_offset;
317  LIBINT2_REALTYPE* target_ptr = target + b0d0_offset;
318  const LIBINT2_REALTYPE bxyz = (LIBINT2_REALTYPE)b[xyz];
319  for(unsigned int v=0; v<veclen; ++v) {
320  target_ptr[v] += bxyz * inteval->oo2ze[v] * src4_ptr[v];
321  }
322 #if LIBINT2_FLOP_COUNT
323  inteval->nflops[0] += 3 * veclen;
324 #endif
325  src4_ptr += Ndm1v;
326 
327  END_FOR_CART // end of loop over b
328  }
329 
330  ++id;
331 
332  END_FOR_CART // end of loop over d
333 
334  }
335 
336  };
337 #endif
338 
339  template <int part, int Lb, int Ld, bool vectorize> struct OSAVRR_sx_sx {
340  static void compute(const Libint_t* inteval,
341  LIBINT2_REALTYPE* target,
342  const LIBINT2_REALTYPE* src1,
343  const LIBINT2_REALTYPE* src4);
344  };
345 
350  template <int Lb, int Ld,
351  bool vectorize> struct OSAVRR_sx_sx<0,Lb,Ld,vectorize> {
352 
353  static void compute(const Libint_t* inteval,
354  LIBINT2_REALTYPE* target,
355  const LIBINT2_REALTYPE* src1,
356  const LIBINT2_REALTYPE* src4) {
357 
358  // works for (sd|sp) and higher
359  assert(not (Lb < 2 || Ld < 1));
360 
361  const unsigned int veclen = vectorize ? inteval->veclen : 1;
362 
363  const unsigned int Nd = INT_NCART(Ld);
364  const unsigned int NdV = Nd * veclen;
365 
366  int bx, by, bz;
367  FOR_CART(bx, by, bz, Lb)
368 
369  int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
370 
371  enum XYZ {x=0, y=1, z=2};
372  // Build along x, if possible
373  XYZ xyz = z;
374  if (by != 0) xyz = y;
375  if (bx != 0) xyz = x;
376  --b[xyz];
377 
378  // redirect
379  const LIBINT2_REALTYPE *WP;
380  switch(xyz) {
381  case x:
382  WP = inteval->WP_x;
383  break;
384  case y:
385  WP = inteval->WP_y;
386  break;
387  case z:
388  WP = inteval->WP_z;
389  break;
390  }
391 
392  const unsigned int ibm1 = INT_CARTINDEX(Lb-1,b[0],b[1]);
393  const unsigned int bm10d0_offset = ibm1 * NdV;
394  const LIBINT2_REALTYPE* src1_ptr = src1 + bm10d0_offset;
395 
396  {
397  unsigned int dv = 0;
398  for(unsigned int d = 0; d < Nd; ++d) {
399  for(unsigned int v=0; v<veclen; ++v, ++dv) {
400  target[dv] = WP[v] * src1_ptr[dv];
401  }
402  }
403 #if LIBINT2_FLOP_COUNT
404  inteval->nflops[0] += NdV;
405 #endif
406  }
407 
408  {
409  const unsigned int Ndm1 = INT_NCART(Ld-1);
410  const unsigned int Ndm1V = Ndm1 * veclen;
411  const unsigned int bm10dm10_offset = ibm1 * Ndm1V;
412  const LIBINT2_REALTYPE* src4_ptr = src4 + bm10dm10_offset;
413 
414  // loop over d-1 shell and include (0 b-1_xyz | 0 d-1_xyz) to (0 b | 0 d)
415  int dx, dy, dz;
416  FOR_CART(dx, dy, dz, Ld-1)
417 
418  int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
419  ++d[xyz];
420 
421  const unsigned int dc = INT_CARTINDEX(Ld,d[0],d[1]);
422  const unsigned int dc_offset = dc * veclen;
423  LIBINT2_REALTYPE* tptr = target + dc_offset;
424  const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
425  for(unsigned int v=0; v<veclen; ++v) {
426  tptr[v] += dxyz * inteval->oo2ze[v] * src4_ptr[v];
427  }
428 #if LIBINT2_FLOP_COUNT
429  inteval->nflops[0] += 3 * veclen;
430 #endif
431  src4_ptr += veclen;
432 
433  END_FOR_CART
434  }
435 
436  target += NdV;
437 
438  END_FOR_CART
439 
440  }
441 
442  };
443 
444 
445 };
446 
447 #endif // header guard
448 
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:23
Definition: OSVRR_sx_sx.h:339
Definition: test_eri_rys.cc:46
Definition: OSVRR_sx_sx.h:33