LIBINT  2.1.0-stable
VRR_GTG_1d_xx_xx_vec.h
1 /*
2  * This file is a part of Libint.
3  * Copyright (C) 2004-2014 Edward F. Valeev
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Library General Public License, version 2,
7  * as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public License
15  * along with this program. If not, see http://www.gnu.org/licenses/.
16  *
17  */
18 
19 #ifndef _libint2_src_lib_libint_vrrgtg1dxxxx_h_
20 #define _libint2_src_lib_libint_vrrgtg1dxxxx_h_
21 //#include "../../../../SIMD_wrapped_vector/template/simd_wrapped_vector.hpp"
22 #include <cstdlib>
23 #include <cassert>
24 #include <libint2.h>
25 #include <util_types.h>
26 
27 
28 // constexpr unsigned int am_tot = 20;
29 // constexpr int npts = am_tot/2 + 1;
30 
31 namespace libint2 {
32 
40  template <unsigned int CartesianAxis, int La, int Lb, int Lc, int Ld, bool vectorize>
41  struct VRR_GTG_1d_xx_xx {
42 
43  static void compute(const Libint_t* inteval,
44  VectorSIMD<double,npts>* target,
45  VectorSIMD<double,npts>* src0) {
46 
47  enum XYZ {x=0, y=1, z=2};
48  assert(CartesianAxis == x || CartesianAxis == y || CartesianAxis == z);
49  //assert(vectorize == false);
50 
51  const unsigned int veclen = vectorize ? inteval->veclen : 1;
52 
53  // corner case: (00|00)
54  if (La == 0 && Lb == 0 && Lc == 0 && Ld == 0) {
55  for (unsigned int v=0; v!=veclen; ++v)
56  target[v] = src0[v];
57  return;
58  }
59 
60  //---------------------------------------------
61  // Part (1): build (a+b 0|c+d 0)
62  //---------------------------------------------
63 
64  VectorSIMD<double,npts> apb_0_GTG_cpd_0[La+Lb+1][Lc+Ld+1];
65  apb_0_GTG_cpd_0[0][0] = src0[0];
66 
67  const VectorSIMD<double,npts> *pfac0_0, *pfac0_1;
68  const VectorSIMD<double,npts> *pfac1_0 = inteval->R12kG12_pfac1_0;
69  const VectorSIMD<double,npts> *pfac1_1 = inteval->R12kG12_pfac1_1;
70  const VectorSIMD<double,npts> *pfac2 = inteval->R12kG12_pfac2;
71  switch (CartesianAxis) {
72  case x:
73  pfac0_0 = inteval->R12kG12_pfac0_0_x;
74  pfac0_1 = inteval->R12kG12_pfac0_1_x;
75  break;
76  case y:
77  pfac0_0 = inteval->R12kG12_pfac0_0_y;
78  pfac0_1 = inteval->R12kG12_pfac0_1_y;
79  break;
80  case z:
81  pfac0_0 = inteval->R12kG12_pfac0_0_z;
82  pfac0_1 = inteval->R12kG12_pfac0_1_z;
83  break;
84  default: assert(false);
85  }
86 
87  // build (0 0|1 0)
88  if (Lc+Ld > 0) {
89  apb_0_GTG_cpd_0[0][1] = pfac0_1[0] * apb_0_GTG_cpd_0[0][0];
90 #if LIBiINT2_FLOP_COUNT
91  inteval->nflops[0] += 1;
92 #endif
93  }
94 
95  // build (0 0|c+d 0)
96  if (Lc+Ld > 1) {
97  for(int c_plus_d=1; c_plus_d!=Lc+Ld; ++c_plus_d) {
98  apb_0_GTG_cpd_0[0][c_plus_d+1] = pfac0_1[0] * apb_0_GTG_cpd_0[0][c_plus_d] +
99  c_plus_d * pfac1_1[0] * apb_0_GTG_cpd_0[0][c_plus_d-1];
100  }
101 #if LIBINT2_FLOP_COUNT
102  inteval->nflops[0] += 4*(Lc+Ld-1);
103 #endif
104  }
105 
106  // build (1 0|0 0)
107  if (La+Lb > 0) {
108  apb_0_GTG_cpd_0[1][0] = pfac0_0[0] * apb_0_GTG_cpd_0[0][0];
109 #if LIBINT2_FLOP_COUNT
110  inteval->nflops[0] += 1;
111 #endif
112  }
113 
114  // build (a+b 0|0 0)
115  if (La+Lb > 1) {
116  for(int a_plus_b=1; a_plus_b!=La+Lb; ++a_plus_b) {
117  apb_0_GTG_cpd_0[a_plus_b+1][0] = pfac0_0[0] * apb_0_GTG_cpd_0[a_plus_b][0] +
118  a_plus_b * pfac1_0[0] * apb_0_GTG_cpd_0[a_plus_b-1][0];
119  }
120 #if LIBINT2_FLOP_COUNT
121  inteval->nflops[0] += 4*(La+Lb-1);
122 #endif
123  }
124 
125  // build (1 0|c+d 0)
126  if (La+Lb > 0 && Lc+Ld > 0) {
127  for(int c_plus_d=1; c_plus_d<=Lc+Ld; ++c_plus_d) {
128  apb_0_GTG_cpd_0[1][c_plus_d] = pfac0_0[0] * apb_0_GTG_cpd_0[0][c_plus_d] +
129  c_plus_d * pfac2[0] * apb_0_GTG_cpd_0[0][c_plus_d-1];
130  }
131 #if LIBINT2_FLOP_COUNT
132  inteval->nflops[0] += 4*(Lc+Ld-1);
133 #endif
134  }
135 
136  // build (a+b 0|c+d 0)
137  if (La+Lb > 1 && Lc+Ld > 0) {
138  for(int a_plus_b=1; a_plus_b!=La+Lb; ++a_plus_b) {
139  for(int c_plus_d=1; c_plus_d<=Lc+Ld; ++c_plus_d) {
140  apb_0_GTG_cpd_0[a_plus_b+1][c_plus_d] = pfac0_0[0] * apb_0_GTG_cpd_0[a_plus_b][c_plus_d] +
141  a_plus_b * pfac1_0[0] * apb_0_GTG_cpd_0[a_plus_b-1][c_plus_d] +
142  c_plus_d * pfac2[0] * apb_0_GTG_cpd_0[a_plus_b][c_plus_d-1];
143  }
144  }
145 #if LIBINT2_FLOP_COUNT
146  inteval->nflops[0] += 7*(La+Lb-1)*(Lc+Ld-1);
147 #endif
148  }
149 
150  //---------------------------------------------
151  // Part (2): build (a b|c+d 0)
152  //---------------------------------------------
153 
154  double AB[1];
155  switch (CartesianAxis) {
156  case x:
157  std::cout<<"printing before segfault"<<std::endl;
158  AB[0] = inteval->AB_x[0];
159  break;
160  case y:
161  AB[0] = inteval->AB_y[0];
162  break;
163  case z:
164  AB[0] = inteval->AB_z[0];
165  break;
166  default: assert(false);
167  }
168 
169  VectorSIMD<double,npts> a_b_GTG_cpd_0[La+1][Lb+1][Lc+Ld+1];
170  for(int c_plus_d=0; c_plus_d<=Lc+Ld; ++c_plus_d) {
171  // copy (a+b 0| to a local 0,a+b buffer
172  VectorSIMD<double,npts> b_a_GTG[La+Lb+1][La+Lb+1];
173  for(int a_plus_b=0; a_plus_b<=La+Lb; ++a_plus_b) {
174  b_a_GTG[0][a_plus_b] = apb_0_GTG_cpd_0[a_plus_b][c_plus_d];
175  }
176  // use HRR to compute b,a
177  for(int b=1; b<=Lb; ++b) {
178  for(int a=0; a<=La+Lb-b; ++a) {
179  b_a_GTG[b][a] = b_a_GTG[b-1][a+1] + AB[0] * b_a_GTG[b-1][a];
180  }
181 #if LIBINT2_FLOP_COUNT
182  inteval->nflops[0] += 2 * (La+Lb-b+1);
183 #endif
184  }
185  // copy b,a to (a b|
186  for(int b=0; b<=Lb; ++b) {
187  for(int a=0; a<=La; ++a) {
188  a_b_GTG_cpd_0[a][b][c_plus_d] = b_a_GTG[b][a];
189  }
190  }
191  }
192 
193  //---------------------------------------------
194  // Part (3): build (a b|c d)
195  //---------------------------------------------
196 
197  double CD[1];
198  switch (CartesianAxis) {
199  case x:
200  CD[0] = inteval->CD_x[0];
201  break;
202  case y:
203  CD[0] = inteval->CD_y[0];
204  break;
205  case z:
206  CD[0] = inteval->CD_z[0];
207  break;
208  default: assert(false);
209  }
210 
211  VectorSIMD<double,npts>* target_a_b_blk_ptr = target;
212  const int Nd = (Ld+1);
213  const int Ncd = (Lc+1)*Nd;
214  for(int a=0; a<=La; ++a) {
215  for(int b=0; b<=Lb; ++b, target_a_b_blk_ptr+=Ncd) {
216  // copy |c+d 0) to a local 0,c+d buffer
217  VectorSIMD<double,npts> d_c_GTG[Lc+Ld+1][Lc+Ld+1];
218  for(int c_plus_d=0; c_plus_d<=Lc+Ld; ++c_plus_d) {
219  d_c_GTG[0][c_plus_d] = a_b_GTG_cpd_0[a][b][c_plus_d];
220  }
221  // use HRR to compute d,c
222  for(int d=1; d<=Ld; ++d) {
223  for(int c=0; c<=Lc+Ld-d; ++c) {
224  d_c_GTG[d][c] = d_c_GTG[d-1][c+1] + CD[0] * d_c_GTG[d-1][c];
225  }
226 #if LIBINT2_FLOP_COUNT
227  inteval->nflops[0] += 2 * (Lc+Ld-d+1);
228 #endif
229  }
230  // copy d,c to |c d)
231  for(int d=0; d<=Ld; ++d) {
232  for(int c=0, cd=d; c<=Lc; ++c, cd+=Nd) {
233  target_a_b_blk_ptr[cd] = d_c_GTG[d][c];
234  }
235  }
236  }
237  }
238 
239  // done
240  }
241 
242  };
243 
244 };
245 
246 #endif // header guard
247 
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:23
Definition: test_eri_rys.cc:46