19 #ifndef _libint2_src_lib_libint_solidharmonics_h_ 20 #define _libint2_src_lib_libint_solidharmonics_h_ 22 #include <libint2/cxxstd.h> 23 #if LIBINT2_CPLUSPLUS_STD < 2011 24 # error "The simple Libint API requires C++11 support" 30 #include <libint2/shell.h> 31 #include <libint2/cgshell_ordering.h> 34 template <
typename Int>
35 signed char parity(Int i) {
42 namespace solidharmonics {
48 template <
typename Real>
51 typedef ::libint2::real_t real_t;
56 assert(l <= std::numeric_limits<signed char>::max());
61 values_(std::move(other.values_)),
62 row_offset_(std::move(other.row_offset_)),
63 colidx_(std::move(other.colidx_)),
69 void init(
unsigned char l) {
70 assert(l <= std::numeric_limits<signed char>::max());
76 static std::vector<SolidHarmonicsCoefficients> shg_coefs(SolidHarmonicsCoefficients::CtorHelperIter(0),
77 SolidHarmonicsCoefficients::CtorHelperIter(LIBINT_MAX_AM+1));
78 assert(l <= LIBINT_MAX_AM);
84 return &values_[0] + row_offset_[r];
87 const unsigned char*
row_idx(
size_t r)
const {
88 return &colidx_[0] + row_offset_[r];
91 unsigned char nnz(
size_t r)
const {
92 return row_offset_[r+1] - row_offset_[r];
96 std::vector<Real> values_;
97 std::vector<unsigned short> row_offset_;
98 std::vector<unsigned char> colidx_;
102 const unsigned short npure = 2*l_ + 1;
103 const unsigned short ncart = (l_ + 1) * (l_ + 2) / 2;
104 std::vector<Real> full_coeff(npure * ncart);
106 for(
signed char m=-l_; m<=l_; ++m) {
107 const signed char pure_idx = m + l_;
108 signed char cart_idx = 0;
109 signed char lx, ly, lz;
110 FOR_CART(lx, ly, lz, l_)
111 full_coeff[pure_idx * ncart + cart_idx] = coeff(l_, m, lx, ly, lz);
119 for(
size_t i=0; i!=full_coeff.size(); ++i)
120 nnz += full_coeff[i] == 0.0 ? 0 : 1;
124 row_offset_.resize(npure+1);
127 unsigned short pc = 0;
128 unsigned short cnt = 0;
129 for(
unsigned short p=0; p!=npure; ++p) {
130 row_offset_[p] = cnt;
131 for(
unsigned short c=0; c!=ncart; ++c, ++pc) {
132 if (full_coeff[pc] != 0.0) {
133 values_[cnt] = full_coeff[pc];
139 row_offset_[npure] = cnt;
149 static double coeff(
int l,
int m,
int lx,
int ly,
int lz) {
150 using libint2::math::fac;
151 using libint2::math::df_Kminus1;
152 using libint2::math::bc;
154 auto abs_m = std::abs(m);
155 if ((lx + ly - abs_m)%2)
158 auto j = (lx + ly - abs_m)/2;
165 auto comp = (m >= 0) ? 1 : -1;
168 if (comp != parity(abs(i)))
172 Real pfac = sqrt( ((Real(fac[2*lx]*fac[2*ly]*fac[2*lz]))/fac[2*l]) *
173 ((Real(fac[l-abs_m]))/(fac[l])) *
174 (Real(1)/fac[l+abs_m]) *
175 (Real(1)/(fac[lx]*fac[ly]*fac[lz]))
180 pfac *= parity((i-1)/2);
185 auto i_max = (l-abs_m)/2;
187 for(
auto i=i_min;i<=i_max;i++) {
188 Real pfac1 = bc(l,i)*bc(i,j);
189 pfac1 *= (Real(parity(i)*fac[2*(l-i)])/fac[l-abs_m-2*i]);
191 const int k_min = std::max((lx-abs_m)/2,0);
192 const int k_max = std::min(j,lx/2);
193 for(
int k=k_min;k<=k_max;k++) {
195 sum1 += bc(j,k)*bc(abs_m,lx-2*k)*parity(k);
199 sum *= sqrt(Real(df_Kminus1[2*l])/(df_Kminus1[2*lx]*df_Kminus1[2*ly]*df_Kminus1[2*lz]));
201 Real result = (m == 0) ? pfac*sum : M_SQRT2*pfac*sum;
205 struct CtorHelperIter :
public std::iterator<std::input_iterator_tag, SolidHarmonicsCoefficients> {
207 using typename std::iterator<std::input_iterator_tag, SolidHarmonicsCoefficients>::value_type;
209 CtorHelperIter() =
default;
210 CtorHelperIter(
unsigned int l) : l_(l) {}
211 CtorHelperIter(
const CtorHelperIter&) =
default;
212 CtorHelperIter& operator=(
const CtorHelperIter& rhs) { l_ = rhs.l_;
return *
this; }
214 CtorHelperIter& operator++() { ++l_;
return *
this; }
215 CtorHelperIter& operator--() { assert(l_ > 0); --l_;
return *
this; }
218 return value_type(l_);
220 bool operator==(
const CtorHelperIter& rhs)
const {
223 bool operator!=(
const CtorHelperIter& rhs)
const {
224 return not (*
this == rhs);
232 template <
typename Real>
233 void transform_first(
size_t l,
size_t n2,
const Real *src, Real *tgt)
237 const auto n = 2*l+1;
238 memset(tgt,0,n*n2*
sizeof(Real));
241 for(
size_t s=0; s!=n; ++s) {
242 const auto nc_s = coefs.nnz(s);
243 const auto* c_idxs = coefs.row_idx(s);
244 const auto* c_vals = coefs.row_values(s);
246 const auto tgt_blk_s_offset = s * n2;
248 for(
size_t ic=0; ic!=nc_s; ++ic) {
249 const auto c = c_idxs[ic];
250 const auto s_c_coeff = c_vals[ic];
252 auto src_blk_s = src + c * n2;
253 auto tgt_blk_s = tgt + tgt_blk_s_offset;
256 for(
size_t i2=0; i2!=n2; ++i2, ++src_blk_s, ++tgt_blk_s) {
257 *tgt_blk_s += s_c_coeff * *src_blk_s;
265 template <
typename Real>
266 void transform_first2(
int l1,
int l2,
size_t inner_dim,
const Real* source_blk, Real* target_blk) {
270 const auto ncart1 = (l1+1)*(l1+2)/2;
271 const auto ncart2 = (l2+1)*(l2+2)/2;
272 const auto npure1 = 2*l1+1;
273 const auto npure2 = 2*l2+1;
274 const auto ncart2inner = ncart2 * inner_dim;
275 const auto npure2inner = npure2 * inner_dim;
276 memset(target_blk, 0, npure1*npure2inner*
sizeof(real_t));
279 const size_t inner_blk_size = 8;
280 const size_t nblks = (inner_dim+inner_blk_size-1)/inner_blk_size;
281 for(
size_t blk=0; blk!=nblks; ++blk) {
282 const auto blk_begin = blk * inner_blk_size;
283 const auto blk_end = std::min(blk_begin + inner_blk_size,inner_dim);
284 const auto blk_size = blk_end - blk_begin;
287 for(
size_t s1=0; s1!=npure1; ++s1) {
288 const auto nc1 = coefs1.nnz(s1);
289 const auto* c1_idxs = coefs1.row_idx(s1);
290 const auto* c1_vals = coefs1.row_values(s1);
292 auto target_blk_s1 = target_blk + s1 * npure2inner + blk_begin;
295 for(
size_t s2=0; s2!=npure2; ++s2) {
296 const auto nc2 = coefs2.nnz(s2);
297 const auto* c2_idxs = coefs2.row_idx(s2);
298 const auto* c2_vals = coefs2.row_values(s2);
299 const auto s2inner = s2 * inner_dim;
300 const auto target_blk_s1_blk_begin = target_blk_s1 + s2inner;
302 for(
size_t ic1=0; ic1!=nc1; ++ic1) {
303 auto c1 = c1_idxs[ic1];
304 auto s1_c1_coeff = c1_vals[ic1];
306 auto source_blk_c1 = source_blk + c1 * ncart2inner + blk_begin;
308 for(
size_t ic2=0; ic2!=nc2; ++ic2) {
309 auto c2 = c2_idxs[ic2];
310 auto s2_c2_coeff = c2_vals[ic2];
311 const auto c2inner = c2 * inner_dim;
313 const auto coeff = s1_c1_coeff * s2_c2_coeff;
314 const auto source_blk_c1_blk_begin = source_blk_c1 + c2inner;
315 for(
auto b=0; b<blk_size; ++b)
316 target_blk_s1_blk_begin[b] += source_blk_c1_blk_begin[b] * coeff;
330 template <
typename Real>
331 void transform_inner(
size_t n1,
size_t l,
size_t n2,
const Real *src, Real *tgt)
335 const auto nc = (l+1)*(l+2)/2;
336 const auto n = 2*l+1;
337 const auto nc_n2 = nc * n2;
338 const auto n_n2 = n * n2;
339 memset(tgt,0,n1*n_n2*
sizeof(Real));
342 for(
size_t s=0; s!=n; ++s) {
343 const auto nc_s = coefs.nnz(s);
344 const auto* c_idxs = coefs.row_idx(s);
345 const auto* c_vals = coefs.row_values(s);
347 const auto tgt_blk_s_offset = s * n2;
349 for(
size_t ic=0; ic!=nc_s; ++ic) {
350 const auto c = c_idxs[ic];
351 const auto s_c_coeff = c_vals[ic];
353 auto src_blk_s = src + c * n2;
354 auto tgt_blk_s = tgt + tgt_blk_s_offset;
357 for(
size_t i1=0; i1!=n1; ++i1, src_blk_s+=nc_n2, tgt_blk_s+=n_n2) {
358 for(
size_t i2=0; i2!=n2; ++i2) {
359 tgt_blk_s[i2] += s_c_coeff * src_blk_s[i2];
368 template <
typename Real>
369 void transform_last(
size_t n1,
size_t l,
const Real *src, Real *tgt)
373 const auto nc = (l+1)*(l+2)/2;
374 const auto n = 2*l+1;
375 memset(tgt,0,n1*n*
sizeof(Real));
378 for(
size_t s=0; s!=n; ++s) {
379 const auto nc_s = coefs.nnz(s);
380 const auto* c_idxs = coefs.row_idx(s);
381 const auto* c_vals = coefs.row_values(s);
383 const auto tgt_blk_s_offset = s;
385 for(
size_t ic=0; ic!=nc_s; ++ic) {
386 const auto c = c_idxs[ic];
387 const auto s_c_coeff = c_vals[ic];
389 auto src_blk_s = src + c;
390 auto tgt_blk_s = tgt + tgt_blk_s_offset;
393 for(
size_t i1=0; i1!=n1; ++i1, src_blk_s+=nc, tgt_blk_s+=n) {
394 *tgt_blk_s += s_c_coeff * *src_blk_s;
402 template <
typename Real>
403 void tform_last2(
size_t n1,
int l_row,
int l_col,
const Real* source_blk, Real* target_blk) {
407 const auto ncart_row = (l_row+1)*(l_row+2)/2;
408 const auto ncart_col = (l_col+1)*(l_col+2)/2;
409 const auto ncart = ncart_row * ncart_col;
410 const auto npure_row = 2*l_row+1;
411 const auto npure_col = 2*l_col+1;
412 const auto npure = npure_row * npure_col;
413 memset(target_blk, 0, n1*npure*
sizeof(Real));
415 for(
size_t i1=0; i1!=n1; ++i1, source_blk+=ncart, target_blk+=npure) {
417 for(
size_t s1=0; s1!=npure_row; ++s1) {
418 const auto nc1 = coefs_row.nnz(s1);
419 const auto* c1_idxs = coefs_row.row_idx(s1);
420 const auto* c1_vals = coefs_row.row_values(s1);
422 auto target_blk_s1 = target_blk + s1 * npure_col;
425 for(
size_t s2=0; s2!=npure_col; ++s2) {
426 const auto nc2 = coefs_col.nnz(s2);
427 const auto* c2_idxs = coefs_col.row_idx(s2);
428 const auto* c2_vals = coefs_col.row_values(s2);
430 for(
size_t ic1=0; ic1!=nc1; ++ic1) {
431 auto c1 = c1_idxs[ic1];
432 auto s1_c1_coeff = c1_vals[ic1];
434 auto source_blk_c1 = source_blk + c1 * ncart_col;
436 for(
size_t ic2=0; ic2!=nc2; ++ic2) {
437 auto c2 = c2_idxs[ic2];
438 auto s2_c2_coeff = c2_vals[ic2];
440 target_blk_s1[s2] += source_blk_c1[c2] * s1_c1_coeff * s2_c2_coeff;
452 template <
typename Real>
453 void tform(
int l_row,
int l_col,
const Real* source_blk, Real* target_blk) {
457 const auto ncart_row = (l_row+1)*(l_row+2)/2;
458 const auto ncart_col = (l_col+1)*(l_col+2)/2;
459 const auto npure_row = 2*l_row+1;
460 const auto npure_col = 2*l_col+1;
461 memset(target_blk, 0, npure_row*npure_col*
sizeof(real_t));
464 for(
size_t s1=0; s1!=npure_row; ++s1) {
465 const auto nc1 = coefs_row.nnz(s1);
466 const auto* c1_idxs = coefs_row.row_idx(s1);
467 const auto* c1_vals = coefs_row.row_values(s1);
469 auto target_blk_s1 = target_blk + s1 * npure_col;
472 for(
size_t s2=0; s2!=npure_col; ++s2) {
473 const auto nc2 = coefs_col.nnz(s2);
474 const auto* c2_idxs = coefs_col.row_idx(s2);
475 const auto* c2_vals = coefs_col.row_values(s2);
477 for(
size_t ic1=0; ic1!=nc1; ++ic1) {
478 auto c1 = c1_idxs[ic1];
479 auto s1_c1_coeff = c1_vals[ic1];
481 auto source_blk_c1 = source_blk + c1 * ncart_col;
483 for(
size_t ic2=0; ic2!=nc2; ++ic2) {
484 auto c2 = c2_idxs[ic2];
485 auto s2_c2_coeff = c2_vals[ic2];
487 target_blk_s1[s2] += source_blk_c1[c2] * s1_c1_coeff * s2_c2_coeff;
498 template <
typename Real>
499 void tform_cols(
size_t nrow,
int l_col,
const Real* source_blk, Real* target_blk) {
500 return transform_last(nrow, l_col, source_blk, target_blk);
503 const auto ncart_col = (l_col+1)*(l_col+2)/2;
504 const auto npure_col = 2*l_col+1;
507 for(
size_t r1=0; r1!=nrow; ++r1) {
509 auto source_blk_r1 = source_blk + r1 * ncart_col;
510 auto target_blk_r1 = target_blk + r1 * npure_col;
513 for(
size_t s2=0; s2!=npure_col; ++s2) {
514 const auto nc2 = coefs_col.nnz(s2);
515 const auto* c2_idxs = coefs_col.row_idx(s2);
516 const auto* c2_vals = coefs_col.row_values(s2);
518 Real r1_s2_value = 0.0;
520 for(
size_t ic2=0; ic2!=nc2; ++ic2) {
521 auto c2 = c2_idxs[ic2];
522 auto s2_c2_coeff = c2_vals[ic2];
524 r1_s2_value += source_blk_r1[c2] * s2_c2_coeff;
528 target_blk_r1[s2] = r1_s2_value;
537 template <
typename Real>
538 void tform_rows(
int l_row,
size_t ncol,
const Real* source_blk, Real* target_blk) {
539 return transform_first(l_row, ncol, source_blk, target_blk);
542 const auto ncart_row = (l_row+1)*(l_row+2)/2;
543 const auto npure_row = 2*l_row+1;
546 for(
size_t s1=0; s1!=npure_row; ++s1) {
547 const auto nc1 = coefs_row.nnz(s1);
548 const auto* c1_idxs = coefs_row.row_idx(s1);
549 const auto* c1_vals = coefs_row.row_values(s1);
551 auto target_blk_s1 = target_blk + s1 * ncol;
554 for(
size_t c2=0; c2!=ncol; ++c2) {
556 Real s1_c2_value = 0.0;
557 auto source_blk_c2_offset = source_blk + c2;
559 for(
size_t ic1=0; ic1!=nc1; ++ic1) {
560 auto c1 = c1_idxs[ic1];
561 auto s1_c1_coeff = c1_vals[ic1];
563 s1_c2_value += source_blk_c2_offset[c1 * ncol] * s1_c1_coeff;
567 target_blk_s1[c2] = s1_c2_value;
575 template <
typename Real,
typename Shell>
576 void tform(
const Shell& shell_row,
const Shell& shell_col,
const Real* source_blk, Real* target_blk) {
577 const auto trow = shell_row.pure;
578 const auto tcol = shell_col.pure;
582 Real localscratch[500];
583 tform_cols(shell_row.cartesian_size(), shell_col.l, source_blk, &localscratch[0]);
584 tform_rows(shell_row.l, shell_col.size(), &localscratch[0], target_blk);
587 tform_rows(shell_row.l, shell_col.cartesian_size(), source_blk, target_blk);
590 tform_cols(shell_row.cartesian_size(), shell_col.l, source_blk, target_blk);
unsigned char nnz(size_t r) const
number of nonzero elements in row r
Definition: solidharmonics.h:91
generally-contracted Solid-Harmonic/Cartesion Gaussian Shell
Definition: shell.h:78
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:23
SafePtr< CTimeEntity< typename ProductType< T, U >::result > > operator*(const SafePtr< CTimeEntity< T > > &A, const SafePtr< CTimeEntity< U > > &B)
Creates product A*B.
Definition: entity.h:277
const Real * row_values(size_t r) const
returns ptr to row values
Definition: solidharmonics.h:83
const unsigned char * row_idx(size_t r) const
returns ptr to row indices
Definition: solidharmonics.h:87
Transformation coefficients from unnormalized Cartesian Gaussians (rows) to unit-normalized real Soli...
Definition: solidharmonics.h:49