22 #ifndef _libint2_src_lib_libint_boys_h_ 23 #define _libint2_src_lib_libint_boys_h_ 25 #if defined(__cplusplus) 31 #include <libint2/vector.h> 36 #include <type_traits> 39 #include <libint2/cxxstd.h> 40 #if LIBINT2_CPLUSPLUS_STD >= 2011 44 #if HAVE_LAPACK // use F77-type interface for now, switch to LAPACKE later 45 extern "C" void dgesv_(
const int* n,
46 const int* nrhs,
double* A,
const int* lda,
47 int* ipiv,
double* b,
const int* ldb,
54 template<
typename Real>
61 for (
int i = 1; i <= ifac; i++) {
62 fac[i] = i * fac[i - 1];
74 for (
int i = 3; i <= idf; i++) {
75 df[i] = (i - 1) * df[i - 2];
80 bc_.resize((ibc+1)*(ibc+1));
81 std::fill(bc_.begin(), bc_.end(), Real(0));
84 for(
int i=1; i<=ibc; ++i)
85 bc[i] = bc[i-1] + (ibc+1);
87 for(
int i=0; i<=ibc; i++)
89 for(
int i=0; i<=ibc; i++)
90 for(
int j=1; j<=i; ++j)
91 bc[i][j] = bc[i][j-1] * Real(i-j+1) / Real(j);
94 for (
int i = 0; i < 128; i++) {
95 twoi1[i] = 1.0 / (Real(2.0) * i + Real(1.0));
96 ihalf[i] = Real(i) - Real(0.5);
104 std::vector<Real> fac;
105 std::vector<Real> df;
106 std::vector<Real*> bc;
115 std::vector<Real> bc_;
118 #define _local_min_macro(a,b) ((a) > (b) ? (a) : (b)) 138 template<
typename Real>
142 static Real
eval(Real T,
size_t m, Real absolute_precision) {
144 static const Real T_crit = std::numeric_limits<Real>::is_bounded ==
true ? -log( std::numeric_limits<Real>::min() * 100.5 / 2. ) : Real(0) ;
145 if (std::numeric_limits<Real>::is_bounded && T > T_crit)
146 throw std::overflow_error(
"FmEval_Reference<Real>::eval: Real lacks precision for the given value of argument T");
147 Real denom = (m + 0.5);
148 Real term = 0.5 * exp(-T) / denom;
153 const Real relative_zero = std::numeric_limits<Real>::epsilon();
154 const Real absolute_precision_o_1000 = absolute_precision * 0.001;
158 term = old_term * T / denom;
163 epsilon = _local_min_macro(absolute_precision_o_1000, sum*relative_zero);
164 }
while (term > epsilon || old_term < term);
174 static void eval(Real* Fm, Real T,
size_t mmax, Real absolute_precision) {
179 for(
size_t m=0; m<=mmax; ++m)
180 Fm[m] = eval(T, m, absolute_precision);
204 template<
typename Real>
212 static void eval(Real* Fm, Real t,
size_t mmax, Real absolute_precision) {
218 const Real two_over_sqrt_pi{1.128379167095512573896158903121545171688101258657997713688171443421284936882986828973487320404214727};
219 const Real K = 1.0/two_over_sqrt_pi;
223 auto sqrt_t = sqrt(t);
224 Fm[0] = K*erf(sqrt_t)/sqrt_t;
226 for(
size_t m=0; m<=mmax-1; m++) {
227 Fm[m+1] = ((2*m + 1)*Fm[m] - et)/(t2);
238 template <
typename Real =
double>
241 static const int NGRID = 4096;
242 static const int INTERPOLATION_ORDER = 4;
243 static const bool INTERPOLATION_AND_RECURSION =
false;
247 const Real one_over_delta;
257 delta(T_crit / (NGRID - 1)),
258 one_over_delta(1.0 / delta),
259 mmax(m_max), numbers_(14) {
271 #if LIBINT2_CPLUSPLUS_STD >= 2011 272 static const std::shared_ptr<FmEval_Chebyshev3>& instance(
int m_max,
double = 0.0) {
276 static auto instance_ = std::shared_ptr<FmEval_Chebyshev3>{};
278 const bool need_new_instance = !instance_ || (instance_ && instance_->max_m() < m_max);
279 if (need_new_instance) {
280 auto new_instance = std::make_shared<FmEval_Chebyshev3>(m_max);
281 instance_ = new_instance;
295 inline void eval(Real* Fm, Real x,
int m_max)
const {
300 const double one_over_x = 1.0/x;
301 Fm[0] = 0.88622692545275801365 * sqrt(one_over_x);
305 for (
int i = 1; i <= m_max; i++)
306 Fm[i] = Fm[i - 1] * numbers_.ihalf[i] * one_over_x;
314 const double xd = x * one_over_delta;
315 const int iv = int(xd);
318 const int m_min = INTERPOLATION_AND_RECURSION ? m_max : 0;
320 #if defined(__AVX__) || defined(__SSE2__) 321 const auto x2 = xd*xd;
322 const auto x3 = x2*xd;
323 # if defined (__AVX__) 325 # else // defined(__SSE2__) 329 #endif // SSE2 || AVX 331 const Real *d = c + INTERPOLATION_ORDER * (iv * (mmax+1) + m_min);
335 const int unroll_size = 4;
336 const int m_fence = (m_max + 2 - unroll_size);
337 for(; m<m_fence; m+=unroll_size, d+=INTERPOLATION_ORDER*unroll_size) {
352 const int unroll_size = 2;
353 const int m_fence = (m_max + 2 - unroll_size);
354 for(; m<m_fence; m+=unroll_size, d+=INTERPOLATION_ORDER*unroll_size) {
365 for(; m<=m_max; ++m, d+=INTERPOLATION_ORDER) {
368 Fm[m] = horizontal_add(dvec * xvec);
371 #elif defined(__SSE2__) 373 const int unroll_size = 2;
374 const int m_fence = (m_max + 2 - unroll_size);
375 for(; m<m_fence; m+=unroll_size, d+=INTERPOLATION_ORDER*unroll_size) {
390 for(; m<=m_max; ++m, d+=INTERPOLATION_ORDER) {
394 Fm[m] = horizontal_add(d0vec * x0vec + d1vec * x1vec);
397 #else // not SSE2 nor AVX available 398 for(
int m=m_min; m<=m_max; ++m, d+=INTERPOLATION_ORDER) {
400 + xd * (d[1] + xd * (d[2] + xd * d[3]));
416 if (INTERPOLATION_AND_RECURSION && m_max > 0) {
417 const bool INTERPOLATION_AND_RECURSION_is_slow_on_modern_CPU =
false;
418 assert(INTERPOLATION_AND_RECURSION_is_slow_on_modern_CPU);
419 const Real x2 = 2.0 * x;
420 const Real exp_x = exp(-x);
421 for (
int m = m_max - 1; m >= 0; m--)
422 Fm[m] = (Fm[m + 1] * x2 + exp_x) * numbers_.twoi1[m];
437 void MakeCoeffs(
double a,
double b, Real *cc,
int m) {
439 double f[128], ac[128], Fm[128];
442 double TwoDelta = b - a;
443 double Delta = 0.5 * TwoDelta;
444 double HalfDelta = 0.5 *
Delta;
445 double XXX = a +
Delta;
447 const double absolute_precision = 1e-100;
451 for (k = 0; k <= INTERPOLATION_ORDER + 20; k++) {
459 for (j = 0; j < INTERPOLATION_ORDER; j++) {
463 fac = 2.0 * pow(HalfDelta, (
double) j);
465 for (k = 0; k < 10; k++)
466 sum += f[j + 2 * k] * pow(HalfDelta, (
double) (2 * k)) / numbers_.fac[k]
467 / numbers_.fac[k + j];
471 double arg = -XXX /
Delta;
472 double arg2 = arg * arg;
473 double arg3 = arg2 * arg;
474 auto cc0 = (ac[0] - ac[2]) + (ac[1] - 3.0 * ac[3]) * arg
475 + 2.0 * ac[2] * arg2 + 4.0 * ac[3] * arg3;
477 auto cc1 = (2.0 * ac[1] - 6.0 * ac[3]) + 8.0 * ac[2] * arg
478 + 24.0 * ac[3] * arg2;
479 auto cc2 = 8.0 * ac[2] + 48.0 * ac[3] * arg;
480 auto cc3 = 32.0 * ac[3];
500 if (posix_memalign(&result,
502 (mmax + 1) * NGRID * INTERPOLATION_ORDER *
sizeof(Real)))
503 throw std::bad_alloc();
504 c =
static_cast<Real*
>(result);
507 for (iv = 0; iv < NGRID; iv++) {
508 const auto a = iv * delta;
509 const auto b = a + delta;
512 for (im = 0; im <= mmax; im++) {
513 MakeCoeffs(a, b, c + (iv * (mmax+1) + im) * INTERPOLATION_ORDER, im);
523 template <
typename Real =
double>
526 static const int N = 60;
527 static const int ORDER = 7;
528 static const int ORDERp1 = ORDER+1;
532 const Real one_over_delta;
543 one_over_delta(1.0 / delta),
544 mmax(m_max), numbers_(14) {
556 #if LIBINT2_CPLUSPLUS_STD >= 2011 557 static const std::shared_ptr<FmEval_Chebyshev7>& instance(
int m_max,
double = 0.0) {
561 static auto instance_ = std::shared_ptr<FmEval_Chebyshev7>{};
563 const bool need_new_instance = !instance_ || (instance_ && instance_->max_m() < m_max);
564 if (need_new_instance) {
565 auto new_instance = std::make_shared<FmEval_Chebyshev7>(m_max);
566 instance_ = new_instance;
580 inline void eval(Real* Fm, Real x,
int m_max)
const {
585 const double one_over_x = 1.0/x;
586 Fm[0] = 0.88622692545275801365 * sqrt(one_over_x);
590 for (
int i = 1; i <= m_max; i++)
591 Fm[i] = Fm[i - 1] * numbers_.ihalf[i] * one_over_x;
599 const Real x_over_delta = x * one_over_delta;
600 const int iv = int(x_over_delta);
601 const Real xd = x_over_delta - (Real)iv - 0.5;
604 #if defined(__AVX__) || defined(__SSE2__) 605 const auto x2 = xd*xd;
606 const auto x3 = x2*xd;
607 const auto x4 = x2*x2;
608 const auto x5 = x2*x3;
609 const auto x6 = x3*x3;
610 const auto x7 = x3*x4;
611 # if defined (__AVX__) 614 # else // defined(__SSE2__) 620 #endif // SSE2 || AVX 622 const Real *d = c + (ORDERp1) * (iv * (mmax+1) + m_min);
626 const int unroll_size = 4;
627 const int m_fence = (m_max + 2 - unroll_size);
628 for(; m<m_fence; m+=unroll_size, d+=ORDERp1*unroll_size) {
630 d20v, d21v, d30v, d31v;
644 const int unroll_size = 2;
645 const int m_fence = (m_max + 2 - unroll_size);
646 for(; m<m_fence; m+=unroll_size, d+=ORDERp1*unroll_size) {
659 for(; m<=m_max; ++m, d+=ORDERp1) {
663 Fm[m] = horizontal_add(d0v * x0vec + d1v * x1vec);
666 #elif defined(__SSE2__) && 0 // no SSE yet 668 const int unroll_size = 2;
669 const int m_fence = (m_max + 2 - unroll_size);
670 for(; m<m_fence; m+=unroll_size, d+=ORDERp1*unroll_size) {
685 for(; m<=m_max; ++m, d+=ORDERp1) {
689 Fm[m] = horizontal_add(d0vec * x0vec + d1vec * x1vec);
692 #else // not SSE2 nor AVX available 693 for(
int m=m_min; m<=m_max; ++m, d+=ORDERp1) {
721 #include <libint2/boys_cheb7.h> 723 assert(mmax <= cheb_table_mmax);
726 posix_memalign(&result, ORDERp1*
sizeof(Real), (mmax + 1) * N * ORDERp1 *
sizeof(Real));
727 c =
static_cast<Real*
>(result);
731 for (
int iv = 0; iv < N; ++iv) {
733 std::copy(cheb_table[iv], cheb_table[iv]+(mmax+1)*ORDERp1, c+(iv*(mmax+1))*ORDERp1);
742 const double oon[] = {0.0, 1.0, 1.0/2.0, 1.0/3.0, 1.0/4.0, 1.0/5.0, 1.0/6.0, 1.0/7.0, 1.0/8.0, 1.0/9.0, 1.0/10.0, 1.0/11.0};
751 template<
typename Real =
double,
int INTERPOLATION_ORDER = 7>
754 static const int max_interp_order = 8;
755 static const bool INTERPOLATION_AND_RECURSION =
false;
756 const double soft_zero_;
760 soft_zero_(1e-6), cutoff_(precision), numbers_(
761 INTERPOLATION_ORDER + 1, 2 * (mmax + INTERPOLATION_ORDER - 1)) {
765 const double sqrt_pi = std::sqrt(M_PI);
773 * std::pow(cutoff_ * numbers_.fac[INTERPOLATION_ORDER + 1],
774 1.0 / INTERPOLATION_ORDER);
775 oodelT_ = 1.0 / delT_;
776 max_m_ = mmax + INTERPOLATION_ORDER - 1;
778 T_crit_ =
new Real[max_m_ + 1];
781 for (
int m = max_m_; m >= 0; --m) {
788 double T = -log(cutoff_);
789 const double egamma = cutoff_ * sqrt_pi * numbers_.df[2 * m]
794 const double damping_factor = 0.2;
797 func = std::pow(T, m - 0.5) * std::exp(-T) - egamma;
798 const double dfuncdT = ((m - 0.5) * std::pow(T, m - 1.5)
799 - std::pow(T, m - 0.5)) * std::exp(-T);
805 double deltaT = -func / dfuncdT;
806 const double sign_deltaT = (deltaT > 0.0) ? 1.0 : -1.0;
807 const double max_deltaT = damping_factor * T;
808 if (std::fabs(deltaT) > max_deltaT)
809 deltaT = sign_deltaT * max_deltaT;
815 }
while (std::fabs(func / egamma) >= soft_zero_);
817 const int T_idx = (int) std::floor(T_new / delT_);
818 max_T_ = std::max(max_T_, T_idx);
823 const int nrow = max_T_ + 1;
824 const int ncol = max_m_ + 1;
825 grid_ =
new Real*[nrow];
826 grid_[0] =
new Real[nrow * ncol];
828 for (
int r = 1; r < nrow; ++r)
829 grid_[r] = grid_[r - 1] + ncol;
838 for (
int T_idx = max_T_; T_idx >= 0; --T_idx) {
839 const double T = T_idx * delT_;
851 #if LIBINT2_CPLUSPLUS_STD >= 2011 852 static const std::shared_ptr<FmEval_Taylor>& instance(
unsigned int mmax, Real precision) {
857 static auto instance_ = std::shared_ptr<FmEval_Taylor>{};
859 const bool need_new_instance = !instance_ ||
860 (instance_ && (instance_->max_m() < mmax ||
861 instance_->precision() > precision));
862 if (need_new_instance) {
863 auto new_instance = std::make_shared<FmEval_Taylor>(mmax, precision);
864 instance_ = new_instance;
872 int max_m()
const {
return max_m_ - INTERPOLATION_ORDER + 1; }
881 void eval(Real* Fm, Real T,
int mmax)
const {
882 const double sqrt_pio2 = 1.2533141373155002512;
883 const double two_T = 2.0 * T;
886 const int mmin = INTERPOLATION_AND_RECURSION ? mmax : 0;
890 const bool use_upward_recursion =
true;
891 if (use_upward_recursion) {
893 if (T > T_crit_[0]) {
894 const double one_over_x = 1.0/T;
895 Fm[0] = 0.88622692545275801365 * sqrt(one_over_x);
899 for (
int i = 1; i <= mmax; i++)
900 Fm[i] = Fm[i - 1] * numbers_.ihalf[i] * one_over_x;
906 if (T > T_crit_[mmax]) {
907 double pow_two_T_to_minusjp05 = std::pow(two_T, -mmax - 0.5);
908 for (
int m = mmax; m >= mmin; --m) {
910 Fm[m] = numbers_.df[2 * m] * sqrt_pio2 * pow_two_T_to_minusjp05;
911 pow_two_T_to_minusjp05 *= two_T;
916 const int T_ind = (int) (0.5 + T * oodelT_);
917 const Real h = T_ind * delT_ - T;
918 const Real* F_row = grid_[T_ind] + mmin;
920 #if defined (__AVX__) 922 if (INTERPOLATION_ORDER == 3 || INTERPOLATION_ORDER == 7) {
923 const double h2 = h*h*oon[2];
924 const double h3 = h2*h*oon[3];
926 if (INTERPOLATION_ORDER == 7) {
927 const double h4 = h3*h*oon[4];
928 const double h5 = h4*h*oon[5];
929 const double h6 = h5*h*oon[6];
930 const double h7 = h6*h*oon[7];
940 const int unroll_size = 2;
941 const int m_fence = (mmax + 2 - unroll_size);
942 for(; m<m_fence; m+=unroll_size, F_row+=unroll_size) {
945 if (INTERPOLATION_ORDER == 3 || INTERPOLATION_ORDER == 7) {
949 if (INTERPOLATION_ORDER == 7) {
952 fm01 += horizontal_add(fr0_4567*h4567, fr1_4567*h4567);
958 Real total0 = 0.0, total1 = 0.0;
959 for(
int i=INTERPOLATION_ORDER; i>=1; --i) {
960 total0 = oon[i]*h*(F_row[i] + total0);
961 total1 = oon[i]*h*(F_row[i+1] + total1);
963 Fm[m] = F_row[0] + total0;
964 Fm[m+1] = F_row[1] + total1;
972 if (INTERPOLATION_ORDER == 3 || INTERPOLATION_ORDER == 7) {
974 if (INTERPOLATION_ORDER == 7) {
978 Fm[m] = horizontal_add(fr0123*h0123 + fr4567*h4567);
981 Fm[m] = horizontal_add(fr0123*h0123);
987 for(
int i=INTERPOLATION_ORDER; i>=1; --i) {
988 total = oon[i]*h*(F_row[i] + total);
990 Fm[m] = F_row[0] + total;
1010 if (INTERPOLATION_AND_RECURSION && mmin > 0) {
1011 const Real exp_mT = std::exp(-T);
1012 for (
int m = mmin - 1; m >= 0; --m) {
1013 Fm[m] = (exp_mT + two_T * Fm[m+1]) * numbers_.twoi1[m];
1043 static double truncation_error(
unsigned int m,
double T) {
1044 const double m2= m * m;
1045 const double m3= m2 * m;
1046 const double m4= m2 * m2;
1047 const double T2= T * T;
1048 const double T3= T2 * T;
1049 const double T4= T2 * T2;
1050 const double T5= T2 * T3;
1052 const double result = exp(-T) * (105 + 16*m4 + 16*m3*(T - 8) - 30*T + 12*T2
1053 - 8*T3 + 16*T4 + 8*m2*(43 - 9*T + 2*T2) +
1054 4*m*(-88 + 23*T - 8*T2 + 4*T3))/(32*T5);
1065 static double truncation_error(
double T) {
1066 const double result = exp(-T) /(2*T);
1081 template<
typename Real>
1084 static const int mmin = -1;
1088 mmax_(mmax), precision_(precision),
1093 unsigned int max_m()
const {
return mmax; }
1098 void eval_yukawa(Real* Gm, Real T, Real U,
size_t mmax, Real absolute_precision) {
1102 void eval_slater(Real* Gm, Real T, Real U,
size_t mmax, Real absolute_precision) {
1112 const Real sqrt_U = sqrt(U);
1113 const Real sqrt_T = sqrt(T);
1114 const Real oo_sqrt_T = 1 / sqrt_T;
1115 const Real oo_sqrt_U = 1 / sqrt_U;
1116 const Real exp_mT = exp(-T);
1117 const Real kappa = sqrt_U - sqrt_T;
1118 const Real lambda = sqrt_U + sqrt_T;
1119 const Real sqrtPi_over_4(0.44311346272637900682454187083528629569938736403060);
1120 const Real pfac = sqrtPi_over_4 * exp_mT;
1121 const Real erfc_k = exp(kappa*kappa) * (1 - erf(kappa));
1122 const Real erfc_l = exp(lambda*lambda) * (1 - erf(lambda));
1124 Gm[0] = pfac * (erfc_k + erfc_l) * oo_sqrt_U;
1125 Gm[1] = pfac * (erfc_k - erfc_l) * oo_sqrt_T;
1129 const Real oo_two_T = 0.5 / T;
1130 const Real two_U = 2.0 * U;
1132 for(
unsigned int m=1, two_m_minus_1=1; m<=mmax; ++m, two_m_minus_1+=2) {
1133 Gm[m+1] = oo_two_T * ( two_m_minus_1 * Gm[m] + two_U * Gm[m-1] - exp_mT);
1149 const int expansion_order = 60;
1150 eval_yukawa_Gm0U(Gm_0_U_, U, mmax - 1 + expansion_order);
1157 const Real one_over_twoU = 0.5 / U;
1158 const Real one_over_twoU = 2.0 * T;
1159 const Real exp_mT = exp(-T);
1160 for(
int m=mmax-2; m>=-1; --m)
1161 Gm[m] = one_over_twoU (exp_mT - numbers_.twoi1[m+1] * Gm[m+1] + twoT Gm[m+2])
1164 std::copy(Gm_0_U_.begin()+1, Gm_0_U_.begin()+mmax+2, Gm);
1182 std::copy(Gm_0_U_.begin()+1, Gm_0_U_.begin()+mmax+2, Gm);
1209 const Real sqrt_U = sqrt(U);
1210 const Real exp_U = exp(U);
1211 const Real oo_sqrt_U = 1 / sqrt_U;
1212 const Real sqrtPi_over_2(
1213 0.88622692545275801364908374167057259139877472806119);
1214 const Real pfac = sqrtPi_over_2 * exp_U;
1215 const Real erfc_sqrt_U = 1.0 - erf(sqrt_U);
1216 Gm_0_U_[0] = pfac * exp_U * oo_sqrt_U * erfc_sqrt_U;
1224 mstar = std::min((
size_t)U,(
size_t)mmax);
1225 const bool implemented =
false;
1226 assert(implemented ==
true);
1230 const Real two_U = 2.0 * U;
1233 for(
int m=mstar+1; m<=mmax; ++m) {
1234 Gm_0_U_[m+1] = numbers_.twoi1[m] * (1.0 - two_U * Gm_0_U_[m]);
1240 const Real one_over_U = 2.0 / two_U;
1241 for(
int m=mstar-1; m>=mmin; --m) {
1242 Gm_0_U_[m+1] = one_over_U * ( 0.5 - numbers_.ihalf[m+2] * Gm_0_U_[m+2]);
1248 std::copy(Gm_0_U_.begin()+1, Gm_0_U_.begin()+mmax+2, Gm0U);
1256 const Real sqrt_U = sqrt(U);
1257 const Real sqrt_T = sqrt(T);
1258 const Real exp_mT = exp(-T);
1259 const Real kappa = sqrt_U - sqrt_T;
1260 const Real lambda = sqrt_U + sqrt_T;
1261 const Real sqrtPi_over_4(0.44311346272637900682454187083528629569938736403060);
1262 const Real result = sqrtPi_over_4 * exp_mT *
1263 (exp(kappa*kappa) * (1 - erf(kappa)) + exp(lambda*lambda) * (1 - erf(lambda))) / sqrt_U;
1268 const Real sqrt_U = sqrt(U);
1269 const Real sqrt_T = sqrt(T);
1270 const Real exp_mT = exp(-T);
1271 const Real kappa = sqrt_U - sqrt_T;
1272 const Real lambda = sqrt_U + sqrt_T;
1273 const Real sqrtPi_over_4(0.44311346272637900682454187083528629569938736403060);
1274 const Real result = sqrtPi_over_4 * exp_mT *
1275 (exp(kappa*kappa) * (1 - erf(kappa)) - exp(lambda*lambda) * (1 - erf(lambda))) / sqrt_T;
1281 const Real sqrt_U = sqrt(U);
1282 const Real sqrt_T = sqrt(T);
1283 const Real oo_sqrt_U = 1 / sqrt_U;
1284 const Real oo_sqrt_T = 1 / sqrt_T;
1285 const Real exp_mT = exp(-T);
1286 const Real kappa = sqrt_U - sqrt_T;
1287 const Real lambda = sqrt_U + sqrt_T;
1288 const Real sqrtPi_over_4(0.44311346272637900682454187083528629569938736403060);
1289 const Real pfac = sqrtPi_over_4 * exp_mT;
1290 const Real erfc_k = exp(kappa*kappa) * (1 - erf(kappa));
1291 const Real erfc_l = exp(lambda*lambda) * (1 - erf(lambda));
1292 result[0] = pfac * (erfc_k + erfc_l) * oo_sqrt_U;
1293 result[1] = pfac * (erfc_k - erfc_l) * oo_sqrt_T;
1303 std::vector<Real> Gm_0_U_;
1313 size_t count_tenno_algorithm_branches[3];
1319 template<
typename Real,
int k>
1330 template <
typename Real>
1332 std::vector<Real> Fm_;
1333 std::vector<Real> g_i;
1334 std::vector<Real> r_i;
1335 std::vector<Real> oorhog_i;
1341 void init(
int mmax) {
1345 oorhog_i.resize(mmax+1);
1377 template<
typename Real,
int k>
1386 precision_(precision), fm_eval_(),
1387 numbers_(-1,-1,mmax) {
1388 assert(k == -1 || k == 0 || k == 2);
1400 #if LIBINT2_CPLUSPLUS_STD >= 2011 1401 static const std::shared_ptr<GaussianGmEval>& instance(
unsigned int mmax, Real precision) {
1406 static auto instance_ = std::shared_ptr<GaussianGmEval>{};
1408 const bool need_new_instance = !instance_ ||
1409 (instance_ && (instance_->max_m() < mmax ||
1410 instance_->precision() > precision));
1411 if (need_new_instance) {
1412 auto new_instance = std::make_shared<GaussianGmEval>(mmax, precision);
1413 instance_ = new_instance;
1421 int max_m()
const {
return mmax_; }
1423 Real precision()
const {
return precision_; }
1439 template <
typename AnyReal>
1440 void eval(Real* Gm, Real rho, Real T,
size_t mmax,
1441 const std::vector<std::pair<AnyReal, AnyReal> >& geminal,
1444 std::fill(Gm, Gm+mmax+1, Real(0));
1446 const double sqrt_rho = sqrt(rho);
1447 const double oo_sqrt_rho = 1.0/sqrt_rho;
1449 void* _scr = (scr == 0) ?
this : scr;
1451 for(
int i=1; i<=mmax; i++) {
1452 scratch.r_i[i] = scratch.r_i[i-1] * rho;
1456 typedef typename std::vector<std::pair<AnyReal, AnyReal> >::const_iterator citer;
1457 const citer gend = geminal.end();
1458 for(citer i=geminal.begin(); i!= gend; ++i) {
1460 const double gamma = i->first;
1461 const double gcoef = i->second;
1462 const double rhog = rho + gamma;
1463 const double oorhog = 1.0/rhog;
1465 const double gorg = gamma * oorhog;
1466 const double rorg = rho * oorhog;
1467 const double sqrt_rho_org = sqrt_rho * oorhog;
1468 const double sqrt_rhog = sqrt(rhog);
1469 const double sqrt_rorg = sqrt_rho_org * sqrt_rhog;
1472 const Real const_SQRTPI_2(0.88622692545275801364908374167057259139877472806119);
1473 const double SS_K0G12_SS = gcoef * oo_sqrt_rho * const_SQRTPI_2 * rorg * sqrt_rorg * exp(-gorg*T);
1476 void* _scr = (scr == 0) ?
this : scr;
1479 const double rorgT = rorg * T;
1480 fm_eval_->eval(&scratch.Fm_[0], rorgT, mmax);
1483 const Real const_2_SQRTPI(1.12837916709551257389615890312154517);
1484 const Real pfac = const_2_SQRTPI * sqrt_rhog * SS_K0G12_SS;
1485 scratch.oorhog_i[0] = pfac;
1486 for(
int i=1; i<=mmax; i++) {
1487 scratch.g_i[i] = scratch.g_i[i-1] * gamma;
1488 scratch.oorhog_i[i] = scratch.oorhog_i[i-1] * oorhog;
1490 for(
int m=0; m<=mmax; m++) {
1492 Real* bcm = numbers_.bc[m];
1493 for(
int n=0; n<=m; n++) {
1494 ssss += bcm[n] * scratch.r_i[n] * scratch.g_i[m-n] * scratch.Fm_[n];
1496 Gm[m] += ssss * scratch.oorhog_i[m];
1502 double ss_oper_ss_m = SS_K0G12_SS;
1503 Gm[0] += ss_oper_ss_m;
1504 for(
int m=1; m<=mmax; ++m) {
1505 ss_oper_ss_m *= gorg;
1506 Gm[m] += ss_oper_ss_m;
1513 const double rorgT = rorg * T;
1514 const double SS_K2G12_SS_0 = (1.5 + rorgT) * (SS_K0G12_SS * oorhog);
1515 const double SS_K2G12_SS_m1 = rorg * (SS_K0G12_SS * oorhog);
1517 double SS_K2G12_SS_gorg_m = SS_K2G12_SS_0 ;
1518 double SS_K2G12_SS_gorg_m1 = SS_K2G12_SS_m1;
1519 Gm[0] += SS_K2G12_SS_gorg_m;
1520 for(
int m=1; m<=mmax; ++m) {
1521 SS_K2G12_SS_gorg_m *= gorg;
1522 Gm[m] += SS_K2G12_SS_gorg_m - m * SS_K2G12_SS_gorg_m1;
1523 SS_K2G12_SS_gorg_m1 *= gorg;
1534 std::shared_ptr<FmEval_Taylor<Real>> fm_eval_;
1539 template <
typename GmEvalFunction>
1540 struct GenericGmEval {
1542 GenericGmEval(
unsigned int mmax,
double precision) : mmax_(mmax), precision_(precision) {}
1544 static std::shared_ptr<GenericGmEval> instance(
unsigned int mmax,
double precision = 0.0) {
1545 return std::make_shared<GenericGmEval>(mmax, precision);
1548 template <
typename Real,
typename... ExtraArgs>
1549 void eval(Real* Gm, Real rho, Real T,
size_t mmax, ExtraArgs... args) {
1550 assert(mmax <= mmax_);
1551 GmEvalFunction eval_;
1552 eval_(Gm, rho, T, mmax, std::forward(args)...);
1569 template <
typename Real>
1573 return -std::exp(-zeta*x)/zeta;
1576 template <
typename Real>
1578 fngtg(
const std::vector<Real>& cc,
1579 const std::vector<Real>& aa,
1582 const Real x2 = x * x;
1583 const unsigned int n = cc.size();
1584 for(
unsigned int i=0; i<n; ++i)
1585 value += cc[i] * std::exp(- aa[i] * x2);
1592 template <
typename Real>
1594 wwtewklopper(Real x) {
1595 const Real x2 = x * x;
1596 return x2 * std::exp(-2 * x2);
1598 template <
typename Real>
1601 const Real x2 = x * x;
1602 const Real x6 = x2 * x2 * x2;
1603 return std::exp(-0.005 * x6);
1606 template <
typename Real>
1613 template <
typename Real>
1615 norm(
const std::vector<Real>& vec) {
1617 const unsigned int n = vec.size();
1618 for(
unsigned int i=0; i<n; ++i)
1619 value += vec[i] * vec[i];
1623 template <
typename Real>
1624 void LinearSolveDamped(
const std::vector<Real>& A,
1625 const std::vector<Real>& b,
1627 std::vector<Real>& x) {
1628 const size_t n = b.size();
1629 std::vector<Real> Acopy(A);
1630 for(
size_t m=0; m<n; ++m) Acopy[m*n + m] *= (1 + lambda);
1631 std::vector<Real> e(b);
1635 std::vector<int> ipiv(n);
1639 dgesv_(&n, &one, &Acopy[0], &n, &ipiv[0], &e[0], &n, &info);
1656 template <
typename Real>
1657 void stg_ng_fit(
unsigned int n,
1659 std::vector< std::pair<Real, Real> >& geminal,
1662 unsigned int npts = 1001) {
1665 std::vector<Real> cc(n, 1.0);
1666 std::vector<Real> aa(n);
1667 for(
unsigned int i=0; i<n; ++i)
1668 aa[i] = std::pow(3.0, (i + 2 - (n + 1)/2.0));
1671 Real ffnormfac = 0.0;
1672 for(
unsigned int i=0; i<n; ++i)
1673 for(
unsigned int j=0; j<n; ++j)
1674 ffnormfac += cc[i] * cc[j]/std::sqrt(aa[i] + aa[j]);
1675 const Real Nf = std::sqrt(2.0 * zeta) * zeta;
1676 const Real Nff = std::sqrt(2.0) / (std::sqrt(ffnormfac) *
1677 std::sqrt(std::sqrt(M_PI)));
1678 for(
unsigned int i=0; i<n; ++i) cc[i] *= -Nff/Nf;
1680 Real lambda0 = 1000;
1681 const Real nu = 3.0;
1682 const Real epsilon = 1e-15;
1683 const unsigned int maxniter = 200;
1686 std::vector<Real> xi(npts);
1687 for(
unsigned int i=0; i<npts; ++i) xi[i] = xmin + (xmax - xmin)*i/(npts - 1);
1689 std::vector<Real> err(npts);
1691 const size_t nparams = 2*n;
1692 std::vector<Real> J( npts * nparams );
1693 std::vector<Real> delta(nparams);
1700 Real errnormIm1 = 1e3;
1701 bool converged =
false;
1702 unsigned int iter = 0;
1703 while (!converged && iter < maxniter) {
1706 for(
unsigned int i=0; i<npts; ++i) {
1707 const Real x = xi[i];
1708 err[i] = (fstg(zeta, x) - fngtg(cc, aa, x)) * std::sqrt(ww(x));
1710 errnormI = norm(err)/std::sqrt((Real)npts);
1713 converged = std::abs((errnormI - errnormIm1)/errnormIm1) <= epsilon;
1714 if (converged)
break;
1715 errnormIm1 = errnormI;
1717 for(
unsigned int i=0; i<npts; ++i) {
1718 const Real x2 = xi[i] * xi[i];
1719 const Real sqrt_ww_x = std::sqrt(ww(xi[i]));
1720 const unsigned int ioffset = i * nparams;
1721 for(
unsigned int j=0; j<n; ++j)
1722 J[ioffset+j] = (std::exp(-aa[j] * x2)) * sqrt_ww_x;
1723 const unsigned int ioffsetn = ioffset+n;
1724 for(
unsigned int j=0; j<n; ++j)
1725 J[ioffsetn+j] = - sqrt_ww_x * x2 * cc[j] * std::exp(-aa[j] * x2);
1728 std::vector<Real> A( nparams * nparams);
1729 for(
size_t r=0, rc=0; r<nparams; ++r) {
1730 for(
size_t c=0; c<nparams; ++c, ++rc) {
1732 for(
size_t i=0, ir=r, ic=c; i<npts; ++i, ir+=nparams, ic+=nparams)
1733 Arc += J[ir] * J[ic];
1738 std::vector<Real> b( nparams );
1739 for(
size_t r=0; r<nparams; ++r) {
1741 for(
size_t i=0, ir=r; i<npts; ++i, ir+=nparams)
1742 br += J[ir] * err[i];
1749 for(
int l=-1; l<1000; ++l) {
1751 LinearSolveDamped(A, b, lambda0, delta );
1753 std::vector<double> cc_0(cc);
for(
unsigned int i=0; i<n; ++i) cc_0[i] += delta[i];
1754 std::vector<double> aa_0(aa);
for(
unsigned int i=0; i<n; ++i) aa_0[i] += delta[i+n];
1757 bool step_too_large =
false;
1758 for(
unsigned int i=0; i<n; ++i)
1759 if (aa_0[i] < 0.0) {
1760 step_too_large =
true;
1763 if (!step_too_large) {
1764 std::vector<double> err_0(npts);
1765 for(
unsigned int i=0; i<npts; ++i) {
1766 const double x = xi[i];
1767 err_0[i] = (fstg(zeta, x) - fngtg(cc_0, aa_0, x)) * std::sqrt(ww(x));
1769 const double errnorm_0 = norm(err_0)/std::sqrt((
double)npts);
1770 if (errnorm_0 < errnormI) {
1785 assert(not (iter == maxniter && errnormI > 1e-10));
1787 for(
unsigned int i=0; i<n; ++i)
1788 geminal[i] = std::make_pair(aa[i], cc[i]);
1795 #endif // header guard holds tables of expensive quantities
Definition: boys.h:55
FmEval_Chebyshev3(int m_max, double=0.0)
Definition: boys.h:255
int max_m() const
Definition: boys.h:289
static const std::shared_ptr< FmEval_Taylor > & instance(unsigned int mmax, Real precision)
Singleton interface allows to manage the lone instance; adjusts max m and precision values as needed ...
Definition: boys.h:854
FmEval_Chebyshev7(int m_max, double=0.0)
Definition: boys.h:540
SIMD vector of 4 double-precision floating-point real numbers, operations on which use AVX instructio...
Definition: vector_x86.h:639
void eval(Real *Fm, Real x, int m_max) const
fills in Fm with computed Boys function values for m in [0,mmax]
Definition: boys.h:580
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:23
void eval(Real *Fm, Real T, int mmax) const
computes Boys function values with m index in range [0,mmax]
Definition: boys.h:881
static void eval(Real *Fm, Real T, size_t mmax, Real absolute_precision)
fills up an array of Fm(T) for m in [0,mmax]
Definition: boys.h:174
static void eval_G_m1_0(Real *result, Real T, Real U)
computes and , both are needed for Yukawa and Slater integrals
Definition: boys.h:1280
some evaluators need thread-local scratch, but most don't
Definition: boys.h:1325
Computes the Boys function, $ F_m (T) = ^1 u^{2m} (-T u^2) \, { d}u $, using multi-algorithm approach...
Definition: boys.h:205
Real precision() const
Definition: boys.h:874
int max_m() const
Definition: boys.h:872
static Real eval_G0(Real T, Real U)
computes a single value of G_0(T,U)
Definition: boys.h:1267
static void eval_yukawa_s1(Real *Gm, Real T, Real U, size_t mmax)
Scheme 1 of Ten-no: upward recursion from and T must be non-zero!
Definition: boys.h:1109
void load(T const *a)
loads a to this
Definition: vector_x86.h:720
FmEval_Taylor(unsigned int mmax, Real precision)
Constructs the object to be able to compute Boys funcion for m in [0,mmax], with relative precision...
Definition: boys.h:759
core integral for Yukawa and exponential interactions
Definition: boys.h:1082
Computes the Boys function, $ F_m (T) = ^1 u^{2m} (-T u^2) \, { d}u $, using 7-th order Chebyshev int...
Definition: boys.h:524
void eval_yukawa_s2(Real *Gm, Real T, Real U, size_t mmax)
Scheme 2 of Ten-no:
Definition: boys.h:1146
void load_aligned(T const *a)
loads a to this
Definition: vector_x86.h:725
Computes the Boys function, $ F_m (T) = ^1 u^{2m} (-T u^2) \, { d}u $, using 3-rd order Chebyshev int...
Definition: boys.h:239
Computes the Boys function, , using single algorithm (asymptotic expansion).
Definition: boys.h:139
void eval_yukawa_Gm0U(Real *Gm0U, Real U, int mmax, int mmin=-1)
computes prerequisites for MacLaurin expansion of Gm(T,U) for m in [-1,mmax); uses Ten-no's prescript...
Definition: boys.h:1197
void eval(Real *Fm, Real x, int m_max) const
fills in Fm with computed Boys function values for m in [0,mmax]
Definition: boys.h:295
void convert(T *a) const
writes this to a
Definition: vector_x86.h:729
int max_m() const
Definition: boys.h:574
static Real eval(Real T, size_t m, Real absolute_precision)
computes a single value of using MacLaurin series.
Definition: boys.h:142
static Real eval_Gm1(Real T, Real U)
computes a single value of G_{-1}(T,U)
Definition: boys.h:1255
static Real eval_MacLaurinT(Real T, Real U, size_t m, Real absolute_precision)
computes a single value of G(T,U) using MacLaurin series.
Definition: boys.h:1297
void eval_yukawa_s3(Real *Gm, Real T, Real U, size_t mmax)
Scheme 3 of Ten-no:
Definition: boys.h:1174
Computes the Boys function, $ F_m (T) = ^1 u^{2m} (-T u^2) \, { d}u $, using Taylor interpolation of ...
Definition: boys.h:752
Real precision() const
Definition: boys.h:1095
static void eval(Real *Fm, Real t, size_t mmax, Real absolute_precision)
fills up an array of Fm(T) for m in [0,mmax]
Definition: boys.h:212