00001
00013
00031
00037 #include <bpm/bpm_messages.h>
00038 #include <bpm/bpm_nr.h>
00039
00040 static int nr_lmLUinverse(double *A, double *B, int m);
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053 void nr_trans_mat_mat_mult(double *a, double *b, int n, int m ) {
00054
00055 register int i, j, k, jj, kk;
00056 register double sum, *bim, *akm;
00057 const int bsize=__LM_BLOCKSZ__;
00058
00059 #define __MIN__(x, y) (((x)<=(y))? (x) : (y))
00060 #define __MAX__(x, y) (((x)>=(y))? (x) : (y))
00061
00062
00063 for(jj=0; jj<m; jj+=bsize){
00064 for(i=0; i<m; ++i){
00065 bim=b+i*m;
00066 for(j=__MAX__(jj, i); j<__MIN__(jj+bsize, m); ++j)
00067 bim[j]=0.0;
00068 }
00069
00070 for(kk=0; kk<n; kk+=bsize){
00071 for(i=0; i<m; ++i){
00072 bim=b+i*m;
00073 for(j=__MAX__(jj, i); j<__MIN__(jj+bsize, m); ++j){
00074 sum=0.0;
00075 for(k=kk; k<__MIN__(kk+bsize, n); ++k){
00076 akm=a+k*m;
00077 sum+=akm[i]*akm[j];
00078 }
00079 bim[j]+=sum;
00080 }
00081 }
00082 }
00083 }
00084
00085
00086 for(i=0; i<m; ++i)
00087 for(j=0; j<i; ++j)
00088 b[i*m+j]=b[j*m+i];
00089
00090 #undef __MIN__
00091 #undef __MAX__
00092
00093 return;
00094 }
00095
00096
00097
00098
00099 void nr_fdif_forw_jac_approx(
00100 void (*func)(double *p, double *hx, int m, int n, void *adata),
00101
00102 double *p,
00103 double *hx,
00104 double *hxx,
00105 double delta,
00106 double *jac,
00107 int m,
00108 int n,
00109 void *adata )
00110 {
00111 register int i, j;
00112 double tmp;
00113 register double d;
00114
00115 for(j=0; j<m; ++j){
00116
00117 d=CNST(1E-04)*p[j];
00118 d=FABS(d);
00119 if(d<delta)
00120 d=delta;
00121
00122 tmp=p[j];
00123 p[j]+=d;
00124
00125 (*func)(p, hxx, m, n, adata);
00126
00127 p[j]=tmp;
00128
00129 d=CNST(1.0)/d;
00130 for(i=0; i<n; ++i){
00131 jac[i*m+j]=(hxx[i]-hx[i])*d;
00132 }
00133 }
00134
00135 return;
00136 }
00137
00138
00139
00140
00141 void nr_fdif_cent_jac_approx(
00142 void (*func)(double *p, double *hx, int m, int n, void *adata),
00143
00144 double *p,
00145 double *hxm,
00146 double *hxp,
00147 double delta,
00148 double *jac,
00149 int m,
00150 int n,
00151 void *adata )
00152 {
00153 register int i, j;
00154 double tmp;
00155 register double d;
00156
00157 for(j=0; j<m; ++j){
00158
00159 d=CNST(1E-04)*p[j];
00160 d=FABS(d);
00161 if(d<delta)
00162 d=delta;
00163
00164 tmp=p[j];
00165 p[j]-=d;
00166 (*func)(p, hxm, m, n, adata);
00167
00168 p[j]=tmp+d;
00169 (*func)(p, hxp, m, n, adata);
00170 p[j]=tmp;
00171
00172 d=CNST(0.5)/d;
00173 for(i=0; i<n; ++i){
00174 jac[i*m+j]=(hxp[i]-hxm[i])*d;
00175 }
00176 }
00177
00178 return;
00179 }
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220 void nr_lmchkjac(
00221 void (*func)(double *p, double *hx, int m, int n, void *adata),
00222 void (*jacf)(double *p, double *j, int m, int n, void *adata),
00223 double *p, int m, int n, void *adata, double *err )
00224 {
00225 double factor=CNST(100.0);
00226 double one=CNST(1.0);
00227 double zero=CNST(0.0);
00228 double *fvec, *fjac, *pp, *fvecp, *buf;
00229
00230 register int i, j;
00231 double eps, epsf, temp, epsmch;
00232 double epslog;
00233 int fvec_sz=n, fjac_sz=n*m, pp_sz=m, fvecp_sz=n;
00234
00235 epsmch=DBL_EPSILON;
00236 eps=(double)sqrt(epsmch);
00237
00238 buf=(double *)malloc((fvec_sz + fjac_sz + pp_sz + fvecp_sz)*sizeof(double));
00239 if(!buf){
00240 bpm_error( "memory allocation request failed in nr_lmchkjac(...)",
00241 __FILE__, __LINE__ );
00242 exit( 1 );
00243 }
00244 fvec=buf;
00245 fjac=fvec+fvec_sz;
00246 pp=fjac+fjac_sz;
00247 fvecp=pp+pp_sz;
00248
00249
00250 (*func)(p, fvec, m, n, adata);
00251
00252
00253 (*jacf)(p, fjac, m, n, adata);
00254
00255
00256 for(j=0; j<m; ++j){
00257 temp=eps*FABS(p[j]);
00258 if(temp==zero) temp=eps;
00259 pp[j]=p[j]+temp;
00260 }
00261
00262
00263 (*func)(pp, fvecp, m, n, adata);
00264
00265 epsf=factor*epsmch;
00266 epslog=(double)log10(eps);
00267
00268 for(i=0; i<n; ++i)
00269 err[i]=zero;
00270
00271 for(j=0; j<m; ++j){
00272 temp=FABS(p[j]);
00273 if(temp==zero) temp=one;
00274
00275 for(i=0; i<n; ++i)
00276 err[i]+=temp*fjac[i*m+j];
00277 }
00278
00279 for(i=0; i<n; ++i){
00280 temp=one;
00281 if(fvec[i]!=zero && fvecp[i]!=zero && FABS(fvecp[i]-fvec[i])>=epsf*FABS(fvec[i]))
00282 temp=eps*FABS((fvecp[i]-fvec[i])/eps - err[i])/(FABS(fvec[i])+FABS(fvecp[i]));
00283 err[i]=one;
00284 if(temp>epsmch && temp<eps)
00285 err[i]=((double)log10(temp) - epslog)/epslog;
00286 if(temp>=eps) err[i]=zero;
00287 }
00288
00289 free(buf);
00290
00291 return;
00292 }
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308 static int nr_lmLUinverse(double *A, double *B, int m) {
00309 void *buf=NULL;
00310 int buf_sz=0;
00311
00312 register int i, j, k, l;
00313 int *idx, maxi=-1, idx_sz, a_sz, x_sz, work_sz, tot_sz;
00314 double *a, *x, *work, max, sum, tmp;
00315
00316
00317 idx_sz=m;
00318 a_sz=m*m;
00319 x_sz=m;
00320 work_sz=m;
00321 tot_sz=idx_sz*sizeof(int) + (a_sz+x_sz+work_sz)*sizeof(double);
00322
00323 buf_sz=tot_sz;
00324 buf=(void *)malloc(tot_sz);
00325 if(!buf){
00326 bpm_error( "memory allocation request failed in nr_lmLUinverse(...)",
00327 __FILE__, __LINE__ );
00328 exit(1);
00329 }
00330
00331 idx=(int *)buf;
00332 a=(double *)(idx + idx_sz);
00333 x=a + a_sz;
00334 work=x + x_sz;
00335
00336
00337 for(i=0; i<a_sz; ++i) a[i]=A[i];
00338
00339
00340
00341 for(i=0; i<m; ++i){
00342 max=0.0;
00343 for(j=0; j<m; ++j)
00344 if((tmp=FABS(a[i*m+j]))>max)
00345 max=tmp;
00346 if(max==0.0){
00347 bpm_error( "Singular matrix A in nr_lmLUinverse(...)",
00348 __FILE__, __LINE__ );
00349 free(buf);
00350
00351 return 0;
00352 }
00353 work[i]=CNST(1.0)/max;
00354 }
00355
00356 for(j=0; j<m; ++j){
00357 for(i=0; i<j; ++i){
00358 sum=a[i*m+j];
00359 for(k=0; k<i; ++k)
00360 sum-=a[i*m+k]*a[k*m+j];
00361 a[i*m+j]=sum;
00362 }
00363 max=0.0;
00364 for(i=j; i<m; ++i){
00365 sum=a[i*m+j];
00366 for(k=0; k<j; ++k)
00367 sum-=a[i*m+k]*a[k*m+j];
00368 a[i*m+j]=sum;
00369 if((tmp=work[i]*FABS(sum))>=max){
00370 max=tmp;
00371 maxi=i;
00372 }
00373 }
00374 if(j!=maxi){
00375 for(k=0; k<m; ++k){
00376 tmp=a[maxi*m+k];
00377 a[maxi*m+k]=a[j*m+k];
00378 a[j*m+k]=tmp;
00379 }
00380 work[maxi]=work[j];
00381 }
00382 idx[j]=maxi;
00383 if(a[j*m+j]==0.0)
00384 a[j*m+j]=DBL_EPSILON;
00385 if(j!=m-1){
00386 tmp=CNST(1.0)/(a[j*m+j]);
00387 for(i=j+1; i<m; ++i)
00388 a[i*m+j]*=tmp;
00389 }
00390 }
00391
00392
00393
00394
00395 for(l=0; l<m; ++l){
00396 for(i=0; i<m; ++i) x[i]=0.0;
00397 x[l]=CNST(1.0);
00398
00399 for(i=k=0; i<m; ++i){
00400 j=idx[i];
00401 sum=x[j];
00402 x[j]=x[i];
00403 if(k!=0)
00404 for(j=k-1; j<i; ++j)
00405 sum-=a[i*m+j]*x[j];
00406 else
00407 if(sum!=0.0)
00408 k=i+1;
00409 x[i]=sum;
00410 }
00411
00412 for(i=m-1; i>=0; --i){
00413 sum=x[i];
00414 for(j=i+1; j<m; ++j)
00415 sum-=a[i*m+j]*x[j];
00416 x[i]=sum/a[i*m+i];
00417 }
00418
00419 for(i=0; i<m; ++i)
00420 B[i*m+l]=x[i];
00421 }
00422
00423 free(buf);
00424
00425 return 1;
00426 }
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449 int nr_lmcovar(double *JtJ, double *C, double sumsq, int m, int n) {
00450 register int i;
00451 int rnk;
00452 double fact;
00453
00454 rnk=nr_lmLUinverse(JtJ, C, m);
00455 if(!rnk) return 0;
00456
00457 rnk=m;
00458
00459 fact=sumsq/(double)(n-rnk);
00460 for(i=0; i<m*m; ++i)
00461 C[i]*=fact;
00462
00463 return rnk;
00464 }
00465
00466
00467
00468 int nr_lmder(
00469 void (*func)(double *p, double *hx, int m, int n, void *adata),
00470 void (*jacf)(double *p, double *j, int m, int n, void *adata),
00471 double *p,
00472 double *x,
00473 int m,
00474 int n,
00475 int itmax,
00476 double opts[4],
00477
00478
00479 double info[LM_INFO_SZ],
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493 double *work,
00494 double *covar,
00495 void *adata)
00496
00497
00498 {
00499 register int i, j, k, l;
00500 int worksz, freework=0, issolved;
00501
00502 double *e,
00503 *hx,
00504 *jacTe,
00505 *jac,
00506 *jacTjac,
00507 *Dp,
00508 *diag_jacTjac,
00509 *pDp;
00510
00511 register double mu,
00512 tmp;
00513 double p_eL2, jacTe_inf, pDp_eL2;
00514 double p_L2, Dp_L2=DBL_MAX, dF, dL;
00515 double tau, eps1, eps2, eps2_sq, eps3;
00516 double init_p_eL2;
00517 int nu=2, nu2, stop, nfev, njev=0;
00518 const int nm=n*m;
00519
00520 mu=jacTe_inf=0.0;
00521 if(n<m){
00522 bpm_error( "Cannot solve a problem with fewer measurements than unknowns",
00523 __FILE__, __LINE__ );
00524 exit(1);
00525 }
00526
00527 if(!jacf){
00528 bpm_error( "No function specified for computing the jacobian in",
00529 __FILE__, __LINE__ );
00530 exit(1);
00531 }
00532
00533 if(opts){
00534 tau=opts[0];
00535 eps1=opts[1];
00536 eps2=opts[2];
00537 eps2_sq=opts[2]*opts[2];
00538 eps3=opts[3];
00539 }
00540 else{
00541 tau=CNST(LM_INIT_MU);
00542 eps1=CNST(LM_STOP_THRESH);
00543 eps2=CNST(LM_STOP_THRESH);
00544 eps2_sq=CNST(LM_STOP_THRESH)*CNST(LM_STOP_THRESH);
00545 eps3=CNST(LM_STOP_THRESH);
00546 }
00547
00548 if(!work){
00549 worksz=LM_DER_WORKSZ(m, n);
00550 work=(double*)malloc(worksz*sizeof(double));
00551 if(!work){
00552 bpm_error( "memory allocation request failed in nr_lmder(...)",
00553 __FILE__, __LINE__ );
00554 exit(1);
00555 }
00556 freework=1;
00557 }
00558
00559
00560 e=work;
00561 hx=e + n;
00562 jacTe=hx + n;
00563 jac=jacTe + m;
00564 jacTjac=jac + nm;
00565 Dp=jacTjac + m*m;
00566 diag_jacTjac=Dp + m;
00567 pDp=diag_jacTjac + m;
00568
00569
00570 (*func)(p, hx, m, n, adata); nfev=1;
00571 for(i=0, p_eL2=0.0; i<n; ++i){
00572 e[i]=tmp=x[i]-hx[i];
00573 p_eL2+=tmp*tmp;
00574 }
00575 init_p_eL2=p_eL2;
00576
00577 for(k=stop=0; k<itmax && !stop; ++k){
00578
00579
00580 if(p_eL2<=eps3){
00581 stop=6;
00582 break;
00583 }
00584
00585
00586
00587
00588
00589
00590 (*jacf)(p, jac, m, n, adata); ++njev;
00591
00592
00593 if(nm<__LM_BLOCKSZ__SQ){
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608 for(i=0; i<m; ++i){
00609 for(j=i; j<m; ++j){
00610 int lm;
00611
00612 for(l=0, tmp=0.0; l<n; ++l){
00613 lm=l*m;
00614 tmp+=jac[lm+i]*jac[lm+j];
00615 }
00616
00617
00618 jacTjac[i*m+j]=jacTjac[j*m+i]=tmp;
00619 }
00620
00621
00622 for(l=0, tmp=0.0; l<n; ++l)
00623 tmp+=jac[l*m+i]*e[l];
00624 jacTe[i]=tmp;
00625 }
00626 }
00627 else{
00628
00629
00630 nr_trans_mat_mat_mult(jac, jacTjac, n, m);
00631
00632
00633 for(i=0; i<m; ++i)
00634 jacTe[i]=0.0;
00635
00636 for(i=0; i<n; ++i){
00637 register double *jacrow;
00638
00639 for(l=0, jacrow=jac+i*m, tmp=e[i]; l<m; ++l)
00640 jacTe[l]+=jacrow[l]*tmp;
00641 }
00642 }
00643
00644
00645 for(i=0, p_L2=jacTe_inf=0.0; i<m; ++i){
00646 if(jacTe_inf < (tmp=FABS(jacTe[i]))) jacTe_inf=tmp;
00647
00648 diag_jacTjac[i]=jacTjac[i*m+i];
00649 p_L2+=p[i]*p[i];
00650 }
00651
00652
00653 #if 0
00654 if(!(k%100)){
00655 printf("Current estimate: ");
00656 for(i=0; i<m; ++i)
00657 printf("%.9g ", p[i]);
00658 printf("-- errors %.9g %0.9g\n", jacTe_inf, p_eL2);
00659 }
00660 #endif
00661
00662
00663 if((jacTe_inf <= eps1)){
00664 Dp_L2=0.0;
00665 stop=1;
00666 break;
00667 }
00668
00669
00670 if(k==0){
00671 for(i=0, tmp=DBL_MIN; i<m; ++i)
00672 if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i];
00673 mu=tau*tmp;
00674 }
00675
00676
00677 while(1){
00678
00679 for(i=0; i<m; ++i)
00680 jacTjac[i*m+i]+=mu;
00681
00682
00683
00684 issolved = nr_ax_eq_b_LU(jacTjac, jacTe, Dp, m);
00685
00686
00687 if(issolved){
00688
00689 for(i=0, Dp_L2=0.0; i<m; ++i){
00690 pDp[i]=p[i] + (tmp=Dp[i]);
00691 Dp_L2+=tmp*tmp;
00692 }
00693
00694
00695 if(Dp_L2<=eps2_sq*p_L2){
00696
00697 stop=2;
00698 break;
00699 }
00700
00701 if(Dp_L2>=(p_L2+eps2)/(CNST(LM_EPSILON)*CNST(LM_EPSILON))){
00702
00703 stop=4;
00704 break;
00705 }
00706
00707 (*func)(pDp, hx, m, n, adata); ++nfev;
00708 for(i=0, pDp_eL2=0.0; i<n; ++i){
00709 hx[i]=tmp=x[i]-hx[i];
00710 pDp_eL2+=tmp*tmp;
00711 }
00712
00713 for(i=0, dL=0.0; i<m; ++i)
00714 dL+=Dp[i]*(mu*Dp[i]+jacTe[i]);
00715
00716 dF=p_eL2-pDp_eL2;
00717
00718 if(dL>0.0 && dF>0.0){
00719 tmp=(CNST(2.0)*dF/dL-CNST(1.0));
00720 tmp=CNST(1.0)-tmp*tmp*tmp;
00721 mu=mu*( (tmp>=CNST(LM_ONE_THIRD))? tmp : CNST(LM_ONE_THIRD) );
00722 nu=2;
00723
00724 for(i=0 ; i<m; ++i)
00725 p[i]=pDp[i];
00726
00727 for(i=0; i<n; ++i)
00728 e[i]=hx[i];
00729 p_eL2=pDp_eL2;
00730 break;
00731 }
00732 }
00733
00734
00735
00736
00737
00738 mu*=nu;
00739 nu2=nu<<1;
00740 if(nu2<=nu){
00741 stop=5;
00742 break;
00743 }
00744 nu=nu2;
00745
00746 for(i=0; i<m; ++i)
00747 jacTjac[i*m+i]=diag_jacTjac[i];
00748 }
00749 }
00750
00751 if(k>=itmax) stop=3;
00752
00753 for(i=0; i<m; ++i)
00754 jacTjac[i*m+i]=diag_jacTjac[i];
00755
00756 if(info){
00757 info[0]=init_p_eL2;
00758 info[1]=p_eL2;
00759 info[2]=jacTe_inf;
00760 info[3]=Dp_L2;
00761 for(i=0, tmp=DBL_MIN; i<m; ++i)
00762 if(tmp<jacTjac[i*m+i]) tmp=jacTjac[i*m+i];
00763 info[4]=mu/tmp;
00764 info[5]=(double)k;
00765 info[6]=(double)stop;
00766 info[7]=(double)nfev;
00767 info[8]=(double)njev;
00768 }
00769
00770
00771 if(covar){
00772 nr_lmcovar(jacTjac, covar, p_eL2, m, n);
00773 }
00774
00775 if(freework) free(work);
00776
00777 return (stop!=4)? k : -1;
00778 }
00779
00780
00781
00782
00783
00784 int nr_lmdif(
00785 void (*func)(double *p, double *hx, int m, int n, void *adata),
00786
00787 double *p,
00788 double *x,
00789 int m,
00790 int n,
00791 int itmax,
00792 double opts[5],
00793
00794
00795
00796
00797
00798 double info[LM_INFO_SZ],
00799
00800
00801
00802
00803
00804
00805
00806
00807
00808
00809
00810
00811
00812 double *work,
00813 double *covar,
00814 void *adata )
00815
00816
00817 {
00818 register int i, j, k, l;
00819 int worksz, freework=0, issolved;
00820
00821 double *e,
00822 *hx,
00823 *jacTe,
00824 *jac,
00825 *jacTjac,
00826 *Dp,
00827 *diag_jacTjac,
00828 *pDp,
00829 *wrk;
00830
00831 int using_ffdif=1;
00832 double *wrk2=NULL;
00833
00834 register double mu,
00835 tmp;
00836 double p_eL2, jacTe_inf, pDp_eL2;
00837 double p_L2, Dp_L2=DBL_MAX, dF, dL;
00838 double tau, eps1, eps2, eps2_sq, eps3, delta;
00839 double init_p_eL2;
00840 int nu, nu2, stop, nfev, njap=0, K=(m>=10)? m: 10, updjac, updp=1, newjac;
00841 const int nm=n*m;
00842
00843 mu=jacTe_inf=p_L2=0.0;
00844 stop=updjac=newjac=0;
00845
00846 if(n<m){
00847 bpm_error( "Cannot solve a problem with fewer measurements than unknowns",
00848 __FILE__, __LINE__ );
00849 exit(1);
00850 }
00851
00852 if(opts){
00853 tau=opts[0];
00854 eps1=opts[1];
00855 eps2=opts[2];
00856 eps2_sq=opts[2]*opts[2];
00857 eps3=opts[3];
00858 delta=opts[4];
00859 if(delta<0.0){
00860 delta=-delta;
00861 using_ffdif=0;
00862 wrk2=(double *)malloc(n*sizeof(double));
00863 if(!wrk2){
00864 bpm_error( "memory allocation request failed in nr_lmdif(...)",
00865 __FILE__, __LINE__ );
00866 exit(1);
00867 }
00868 }
00869 }
00870 else{
00871 tau=CNST(LM_INIT_MU);
00872 eps1=CNST(LM_STOP_THRESH);
00873 eps2=CNST(LM_STOP_THRESH);
00874 eps2_sq=CNST(LM_STOP_THRESH)*CNST(LM_STOP_THRESH);
00875 eps3=CNST(LM_STOP_THRESH);
00876 delta=CNST(LM_DIFF_DELTA);
00877 }
00878
00879 if(!work){
00880 worksz=LM_DIF_WORKSZ(m, n);
00881 work=(double *)malloc(worksz*sizeof(double));
00882 if(!work){
00883 bpm_error( "memory allocation request failed in nr_lmdif(...)",
00884 __FILE__, __LINE__ );
00885 exit(1);
00886 }
00887 freework=1;
00888 }
00889
00890
00891 e=work;
00892 hx=e + n;
00893 jacTe=hx + n;
00894 jac=jacTe + m;
00895 jacTjac=jac + nm;
00896 Dp=jacTjac + m*m;
00897 diag_jacTjac=Dp + m;
00898 pDp=diag_jacTjac + m;
00899 wrk=pDp + m;
00900
00901
00902 (*func)(p, hx, m, n, adata); nfev=1;
00903 for(i=0, p_eL2=0.0; i<n; ++i){
00904 e[i]=tmp=x[i]-hx[i];
00905 p_eL2+=tmp*tmp;
00906 }
00907 init_p_eL2=p_eL2;
00908
00909 nu=20;
00910
00911 for(k=0; k<itmax; ++k){
00912
00913
00914
00915
00916 if(p_eL2<=eps3){
00917 stop=6;
00918 break;
00919 }
00920
00921
00922
00923
00924
00925 if((updp && nu>16) || updjac==K){
00926 if(using_ffdif){
00927 nr_fdif_forw_jac_approx(func, p, hx, wrk, delta, jac, m, n, adata);
00928 ++njap; nfev+=m;
00929 }
00930 else{
00931 nr_fdif_cent_jac_approx(func, p, wrk, wrk2, delta, jac, m, n, adata);
00932 ++njap; nfev+=2*m;
00933 }
00934 nu=2; updjac=0; updp=0; newjac=1;
00935 }
00936
00937 if(newjac){
00938 newjac=0;
00939
00940
00941 if(nm<=__LM_BLOCKSZ__SQ){
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00955
00956 for(i=0; i<m; ++i){
00957 for(j=i; j<m; ++j){
00958 int lm;
00959
00960 for(l=0, tmp=0.0; l<n; ++l){
00961 lm=l*m;
00962 tmp+=jac[lm+i]*jac[lm+j];
00963 }
00964
00965 jacTjac[i*m+j]=jacTjac[j*m+i]=tmp;
00966 }
00967
00968
00969 for(l=0, tmp=0.0; l<n; ++l)
00970 tmp+=jac[l*m+i]*e[l];
00971 jacTe[i]=tmp;
00972 }
00973 }
00974 else{
00975
00976
00977
00978 nr_trans_mat_mat_mult(jac, jacTjac, n, m);
00979
00980
00981 for(i=0; i<m; ++i)
00982 jacTe[i]=0.0;
00983
00984 for(i=0; i<n; ++i){
00985 register double *jacrow;
00986
00987 for(l=0, jacrow=jac+i*m, tmp=e[i]; l<m; ++l)
00988 jacTe[l]+=jacrow[l]*tmp;
00989 }
00990 }
00991
00992
00993 for(i=0, p_L2=jacTe_inf=0.0; i<m; ++i){
00994 if ( jacTe_inf < ( tmp=FABS(jacTe[i]) ) ) jacTe_inf=tmp;
00995
00996 diag_jacTjac[i]=jacTjac[i*m+i];
00997 p_L2+=p[i]*p[i];
00998 }
00999
01000 }
01001
01002
01003 #if 0
01004 if(!(k%100)){
01005 printf("Current estimate: ");
01006 for(i=0; i<m; ++i)
01007 printf("%.9g (%.9g) ", p[i], jacTe[i]);
01008 printf("-- errors %.9g %0.9g \n", jacTe_inf, p_eL2 );
01009 }
01010 #endif
01011
01012
01013 if((jacTe_inf <= eps1)){
01014 Dp_L2=0.0;
01015 stop=1;
01016 break;
01017 }
01018
01019
01020 if(k==0){
01021 for(i=0, tmp=DBL_MIN; i<m; ++i)
01022 if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i];
01023 mu=tau*tmp;
01024 }
01025
01026
01027
01028
01029 for(i=0; i<m; ++i)
01030 jacTjac[i*m+i]+=mu;
01031
01032
01033 issolved=nr_ax_eq_b_LU(jacTjac, jacTe, Dp, m);
01034
01035
01036 if(issolved){
01037
01038 for(i=0, Dp_L2=0.0; i<m; ++i){
01039 pDp[i]=p[i] + (tmp=Dp[i]);
01040 Dp_L2+=tmp*tmp;
01041 }
01042
01043
01044 if(Dp_L2<=eps2_sq*p_L2){
01045
01046 stop=2;
01047 break;
01048 }
01049
01050 if(Dp_L2>=(p_L2+eps2)/(CNST(LM_EPSILON)*CNST(LM_EPSILON))){
01051
01052 stop=4;
01053 break;
01054 }
01055
01056 (*func)(pDp, wrk, m, n, adata); ++nfev;
01057 for(i=0, pDp_eL2=0.0; i<n; ++i){
01058 tmp=x[i]-wrk[i];
01059 pDp_eL2+=tmp*tmp;
01060 }
01061
01062 dF=p_eL2-pDp_eL2;
01063 if(updp || dF>0){
01064 for(i=0; i<n; ++i){
01065 for(l=0, tmp=0.0; l<m; ++l)
01066 tmp+=jac[i*m+l]*Dp[l];
01067 tmp=(wrk[i] - hx[i] - tmp)/Dp_L2;
01068 for(j=0; j<m; ++j)
01069 jac[i*m+j]+=tmp*Dp[j];
01070 }
01071 ++updjac;
01072 newjac=1;
01073 }
01074
01075 for(i=0, dL=0.0; i<m; ++i)
01076 dL+=Dp[i]*(mu*Dp[i]+jacTe[i]);
01077
01078 if(dL>0.0 && dF>0.0){
01079 dF=(CNST(2.0)*dF/dL-CNST(1.0));
01080 tmp=dF*dF*dF;
01081 tmp=CNST(1.0)-tmp*tmp*dF;
01082 mu=mu*( (tmp>=CNST(LM_ONE_THIRD))? tmp : CNST(LM_ONE_THIRD) );
01083 nu=2;
01084
01085 for(i=0 ; i<m; ++i)
01086 p[i]=pDp[i];
01087
01088 for(i=0; i<n; ++i){
01089 e[i]=x[i]-wrk[i];
01090 hx[i]=wrk[i];
01091 }
01092 p_eL2=pDp_eL2;
01093 updp=1;
01094 continue;
01095 }
01096 }
01097
01098
01099
01100
01101
01102 mu*=nu;
01103 nu2=nu<<1;
01104 if(nu2<=nu){
01105 stop=5;
01106 break;
01107 }
01108 nu=nu2;
01109
01110 for(i=0; i<m; ++i)
01111 jacTjac[i*m+i]=diag_jacTjac[i];
01112
01113 }
01114
01115 if(k>=itmax) stop=3;
01116
01117 for(i=0; i<m; ++i)
01118 jacTjac[i*m+i]=diag_jacTjac[i];
01119
01120 if(info){
01121 info[0]=init_p_eL2;
01122 info[1]=p_eL2;
01123 info[2]=jacTe_inf;
01124 info[3]=Dp_L2;
01125 for(i=0, tmp=DBL_MIN; i<m; ++i)
01126 if(tmp<jacTjac[i*m+i]) tmp=jacTjac[i*m+i];
01127 info[4]=mu/tmp;
01128 info[5]=(double)k;
01129 info[6]=(double)stop;
01130 info[7]=(double)nfev;
01131 info[8]=(double)njap;
01132 }
01133
01134
01135 if(covar){
01136 nr_lmcovar(jacTjac, covar, p_eL2, m, n);
01137 }
01138
01139
01140 if ( freework && ( work != NULL )) free(work);
01141
01142 if ( wrk2 ) free(wrk2);
01143
01144 return (stop!=4)? k : -1;
01145 }
01146
01147
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164 int nr_ax_eq_b_LU(double *A, double *B, double *x, int m ) {
01165 __LM_STATIC__ void *buf=NULL;
01166 __LM_STATIC__ int buf_sz=0;
01167
01168 register int i, j, k;
01169 int *idx, maxi=-1, idx_sz, a_sz, work_sz, tot_sz;
01170 double *a, *work, max, sum, tmp;
01171
01172 #ifdef LINSOLVERS_RETAIN_MEMORY
01173 if(!A){
01174 if(buf) free(buf);
01175 buf_sz=0;
01176 return 1;
01177 }
01178 #endif
01179
01180
01181 idx_sz=m;
01182 a_sz=m*m;
01183 work_sz=m;
01184 tot_sz=idx_sz*sizeof(int) + (a_sz+work_sz)*sizeof(double);
01185
01186 #ifdef LINSOLVERS_RETAIN_MEMORY
01187 if(tot_sz>buf_sz){
01188 if(buf) free(buf);
01189
01190 buf_sz=tot_sz;
01191 buf=(void *)malloc(tot_sz);
01192 if(!buf){
01193 bpm_error( "memory allocation request failed in nr_ax_eq_b_LU(...)",
01194 __FILE__, __LINE__ );
01195 exit(1);
01196 }
01197 }
01198 #else
01199 buf_sz=tot_sz;
01200 buf=(void *)malloc(tot_sz);
01201 if(!buf){
01202 bpm_error( "memory allocation request failed in nr_ax_eq_b_LU(...)",
01203 __FILE__, __LINE__ );
01204 exit(1);
01205 }
01206 #endif
01207
01208 idx=(int *)buf;
01209 a=(double *)(idx + idx_sz);
01210 work=a + a_sz;
01211
01212
01213 for(i=0; i<m; ++i){
01214 a[i]=A[i];
01215 x[i]=B[i];
01216 }
01217 for( ; i<a_sz; ++i) a[i]=A[i];
01218
01219
01220
01221
01222
01223
01224
01225
01226
01227 for(i=0; i<m; ++i){
01228 max=0.0;
01229 for(j=0; j<m; ++j)
01230 if((tmp=FABS(a[i*m+j]))>max)
01231 max=tmp;
01232 if(max==0.0){
01233 bpm_error( "Singular matrix A in nr_ax_eq_b_LU(...)", __FILE__, __LINE__ );
01234
01235 #ifndef LINSOLVERS_RETAIN_MEMORY
01236 free(buf);
01237 #endif
01238
01239 return 0;
01240 }
01241 work[i]=CNST(1.0)/max;
01242 }
01243
01244 for(j=0; j<m; ++j){
01245 for(i=0; i<j; ++i){
01246 sum=a[i*m+j];
01247 for(k=0; k<i; ++k)
01248 sum-=a[i*m+k]*a[k*m+j];
01249 a[i*m+j]=sum;
01250 }
01251 max=0.0;
01252 for(i=j; i<m; ++i){
01253 sum=a[i*m+j];
01254 for(k=0; k<j; ++k)
01255 sum-=a[i*m+k]*a[k*m+j];
01256 a[i*m+j]=sum;
01257 if((tmp=work[i]*FABS(sum))>=max){
01258 max=tmp;
01259 maxi=i;
01260 }
01261 }
01262 if(j!=maxi){
01263 for(k=0; k<m; ++k){
01264 tmp=a[maxi*m+k];
01265 a[maxi*m+k]=a[j*m+k];
01266 a[j*m+k]=tmp;
01267 }
01268 work[maxi]=work[j];
01269 }
01270 idx[j]=maxi;
01271 if(a[j*m+j]==0.0)
01272 a[j*m+j]=DBL_EPSILON;
01273 if(j!=m-1){
01274 tmp=CNST(1.0)/(a[j*m+j]);
01275 for(i=j+1; i<m; ++i)
01276 a[i*m+j]*=tmp;
01277 }
01278 }
01279
01280
01281
01282
01283 for(i=k=0; i<m; ++i){
01284 j=idx[i];
01285 sum=x[j];
01286 x[j]=x[i];
01287 if(k!=0)
01288 for(j=k-1; j<i; ++j)
01289 sum-=a[i*m+j]*x[j];
01290 else
01291 if(sum!=0.0)
01292 k=i+1;
01293 x[i]=sum;
01294 }
01295
01296 for(i=m-1; i>=0; --i){
01297 sum=x[i];
01298 for(j=i+1; j<m; ++j)
01299 sum-=a[i*m+j]*x[j];
01300 x[i]=sum/a[i*m+i];
01301 }
01302
01303 #ifndef LINSOLVERS_RETAIN_MEMORY
01304 free(buf);
01305 #endif
01306
01307 return 1;
01308 }
01309
01310
01311
01312
01313 static void
01314 lm_lnsrch(int m, double *x, double f, double *g, double *p, double alpha, double *xpls,
01315 double *ffpls, void (*func)(double *p, double *hx, int m, int n, void *adata), struct lm_fstate state,
01316 int *mxtake, int *iretcd, double stepmx, double steptl, double *sx)
01317 {
01318
01319
01320
01321
01322
01323
01324
01325
01326
01327
01328
01329
01330
01331
01332
01333
01334
01335
01336
01337
01338
01339
01340
01341
01342
01343
01344
01345
01346
01347
01348
01349
01350 register int i;
01351 int firstback = 1;
01352 double disc;
01353 double a3, b;
01354 double t1, t2, t3, lambda, tlmbda, rmnlmb;
01355 double scl, rln, sln, slp;
01356 double tmp1, tmp2;
01357 double fpls, pfpls = 0., plmbda = 0.;
01358
01359 f*=CNST(0.5);
01360 *mxtake = 0;
01361 *iretcd = 2;
01362 tmp1 = 0.;
01363 if(!sx)
01364 for (i = 0; i < m; ++i)
01365 tmp1 += p[i] * p[i];
01366 else
01367 for (i = 0; i < m; ++i)
01368 tmp1 += sx[i] * sx[i] * p[i] * p[i];
01369 sln = (double)sqrt(tmp1);
01370 if (sln > stepmx) {
01371
01372 scl = stepmx / sln;
01373 for(i=0; i<m; ++i)
01374 p[i]*=scl;
01375 sln = stepmx;
01376 }
01377 for(i=0, slp=0.; i<m; ++i)
01378 slp+=g[i]*p[i];
01379 rln = 0.;
01380 if(!sx)
01381 for (i = 0; i < m; ++i) {
01382 tmp1 = (FABS(x[i])>=CNST(1.))? FABS(x[i]) : CNST(1.);
01383 tmp2 = FABS(p[i])/tmp1;
01384 if(rln < tmp2) rln = tmp2;
01385 }
01386 else
01387 for (i = 0; i < m; ++i) {
01388 tmp1 = (FABS(x[i])>=CNST(1.)/sx[i])? FABS(x[i]) : CNST(1.)/sx[i];
01389 tmp2 = FABS(p[i])/tmp1;
01390 if(rln < tmp2) rln = tmp2;
01391 }
01392 rmnlmb = steptl / rln;
01393 lambda = CNST(1.0);
01394
01395
01396
01397 while(*iretcd > 1) {
01398 for (i = 0; i < m; ++i)
01399 xpls[i] = x[i] + lambda * p[i];
01400
01401
01402 (*func)(xpls, state.hx, m, state.n, state.adata);
01403 for(i=0, tmp1=0.0; i<state.n; ++i){
01404 state.hx[i]=tmp2=state.x[i]-state.hx[i];
01405 tmp1+=tmp2*tmp2;
01406 }
01407 fpls=CNST(0.5)*tmp1; *ffpls=tmp1; ++(*(state.nfev));
01408
01409 if (fpls <= f + slp * alpha * lambda) {
01410 *iretcd = 0;
01411 if (lambda == CNST(1.) && sln > stepmx * CNST(.99)) *mxtake = 1;
01412 return;
01413 }
01414
01415
01416
01417
01418
01419 if (lambda < rmnlmb) {
01420
01421
01422 *iretcd = 1;
01423 return;
01424 }
01425 else {
01426
01427
01428 if (fpls >= DBL_MAX) {
01429 lambda *= CNST(0.1);
01430 firstback = 1;
01431 }
01432 else {
01433 if (firstback) {
01434 tlmbda = -lambda * slp / ((fpls - f - slp) * CNST(2.));
01435 firstback = 0;
01436 }
01437 else {
01438 t1 = fpls - f - lambda * slp;
01439 t2 = pfpls - f - plmbda * slp;
01440 t3 = CNST(1.) / (lambda - plmbda);
01441 a3 = CNST(3.) * t3 * (t1 / (lambda * lambda)
01442 - t2 / (plmbda * plmbda));
01443 b = t3 * (t2 * lambda / (plmbda * plmbda)
01444 - t1 * plmbda / (lambda * lambda));
01445 disc = b * b - a3 * slp;
01446 if (disc > b * b)
01447
01448 tlmbda = (-b + ((a3 < 0)? -(double)sqrt(disc): (double)sqrt(disc))) /a3;
01449 else
01450
01451 tlmbda = (-b + ((a3 < 0)? (double)sqrt(disc): -(double)sqrt(disc))) /a3;
01452
01453 if (tlmbda > lambda * CNST(.5))
01454 tlmbda = lambda * CNST(.5);
01455 }
01456 plmbda = lambda;
01457 pfpls = fpls;
01458 if (tlmbda < lambda * CNST(.1))
01459 lambda *= CNST(.1);
01460 else
01461 lambda = tlmbda;
01462 }
01463 }
01464 }
01465 }
01466
01467
01468
01469
01470
01471
01472 static void boxProject(double *p, double *lb, double *ub, int m) {
01473 register int i;
01474
01475 if(!lb){
01476 if(!ub)
01477 return;
01478 else{
01479 for(i=0; i<m; ++i)
01480 if(p[i]>ub[i]) p[i]=ub[i];
01481 }
01482 }
01483 else
01484 if(!ub){
01485 for(i=0; i<m; ++i)
01486 if(p[i]<lb[i]) p[i]=lb[i];
01487 }
01488 else
01489 for(i=0; i<m; ++i)
01490 p[i]=__LM_MEDIAN3(lb[i], p[i], ub[i]);
01491
01492 return;
01493 }
01494
01495
01496 static int boxCheck(double *lb, double *ub, int m) {
01497 register int i;
01498
01499 if(!lb || !ub) return 1;
01500
01501 for(i=0; i<m; ++i)
01502 if(lb[i]>ub[i]) return 0;
01503
01504 return 1;
01505 }
01506
01507
01508
01509
01510
01511
01512
01513
01514
01515
01516
01517
01518
01519
01520
01521
01522
01523
01524
01525
01526
01527
01528 int nr_lmder_bc(
01529 void (*func)(double *p, double *hx, int m, int n, void *adata),
01530 void (*jacf)(double *p, double *j, int m, int n, void *adata),
01531 double *p,
01532 double *x,
01533 int m,
01534 int n,
01535 double *lb,
01536 double *ub,
01537 int itmax,
01538 double opts[4],
01539
01540
01541
01542 double info[LM_INFO_SZ],
01543
01544
01545
01546
01547
01548
01549
01550
01551
01552
01553
01554
01555
01556 double *work,
01557 double *covar,
01558 void *adata)
01559
01560
01561 {
01562 register int i, j, k, l;
01563 int worksz, freework=0, issolved;
01564
01565 double *e,
01566 *hx,
01567 *jacTe,
01568 *jac,
01569 *jacTjac,
01570 *Dp,
01571 *diag_jacTjac,
01572 *pDp;
01573
01574 register double mu,
01575 tmp;
01576 double p_eL2, jacTe_inf, pDp_eL2;
01577 double p_L2, Dp_L2=DBL_MAX, dF, dL;
01578 double tau, eps1, eps2, eps2_sq, eps3;
01579 double init_p_eL2;
01580 int nu=2, nu2, stop, nfev, njev=0;
01581 const int nm=n*m;
01582
01583
01584 struct lm_fstate fstate;
01585 double alpha=CNST(1e-4), beta=CNST(0.9), gamma=CNST(0.99995), gamma_sq=gamma*gamma, rho=CNST(1e-8);
01586 double t, t0;
01587 double steptl=CNST(1e3)*(double)sqrt(DBL_EPSILON), jacTeDp;
01588 double tmin=CNST(1e-12), tming=CNST(1e-18);
01589 const double tini=CNST(1.0);
01590 int nLMsteps=0, nLSsteps=0, nPGsteps=0, gprevtaken=0;
01591 int numactive;
01592
01593 mu=jacTe_inf=t=0.0; tmin=tmin;
01594
01595 if(n<m){
01596 bpm_error( "Cannot solve a problem with fewer measurements than unknowns",
01597 __FILE__, __LINE__ );
01598 exit(1);
01599 }
01600
01601 if(!jacf){
01602 bpm_error( "No function specified for computing the jacobian in",
01603 __FILE__, __LINE__ );
01604 exit(1);
01605 }
01606
01607 if(!boxCheck(lb, ub, m)){
01608 bpm_error( "At least one lower bound exceeds the upper one",
01609 __FILE__, __LINE__ );
01610 exit(1);
01611 }
01612
01613 if(opts){
01614 tau=opts[0];
01615 eps1=opts[1];
01616 eps2=opts[2];
01617 eps2_sq=opts[2]*opts[2];
01618 eps3=opts[3];
01619 }
01620 else{
01621 tau=CNST(LM_INIT_MU);
01622 eps1=CNST(LM_STOP_THRESH);
01623 eps2=CNST(LM_STOP_THRESH);
01624 eps2_sq=CNST(LM_STOP_THRESH)*CNST(LM_STOP_THRESH);
01625 eps3=CNST(LM_STOP_THRESH);
01626 }
01627
01628 if(!work){
01629 worksz=LM_DER_WORKSZ(m, n);
01630 work=(double *)malloc(worksz*sizeof(double));
01631 if(!work){
01632 bpm_error( "Memory allocation request failed", __FILE__, __LINE__ );
01633 exit(1);
01634 }
01635 freework=1;
01636 }
01637
01638
01639 e=work;
01640 hx=e + n;
01641 jacTe=hx + n;
01642 jac=jacTe + m;
01643 jacTjac=jac + nm;
01644 Dp=jacTjac + m*m;
01645 diag_jacTjac=Dp + m;
01646 pDp=diag_jacTjac + m;
01647
01648 fstate.n=n;
01649 fstate.hx=hx;
01650 fstate.x=x;
01651 fstate.adata=adata;
01652 fstate.nfev=&nfev;
01653
01654
01655 for(i=0; i<m; ++i)
01656 pDp[i]=p[i];
01657 boxProject(p, lb, ub, m);
01658 for(i=0; i<m; ++i)
01659 if(pDp[i]!=p[i])
01660 fprintf(stderr, "Warning: component %d of starting point not feasible in nr_lmder_bc! [%g projected to %g]\n", i, p[i], pDp[i]);
01661
01662
01663 (*func)(p, hx, m, n, adata); nfev=1;
01664 for(i=0, p_eL2=0.0; i<n; ++i){
01665 e[i]=tmp=x[i]-hx[i];
01666 p_eL2+=tmp*tmp;
01667 }
01668 init_p_eL2=p_eL2;
01669
01670 for(k=stop=0; k<itmax && !stop; ++k){
01671
01672
01673
01674 if(p_eL2<=eps3){
01675 stop=6;
01676 break;
01677 }
01678
01679
01680
01681
01682
01683
01684 (*jacf)(p, jac, m, n, adata); ++njev;
01685
01686
01687 if(nm<__LM_BLOCKSZ__SQ){
01688
01689
01690
01691
01692
01693
01694
01695
01696
01697
01698
01699
01700
01701
01702 for(i=0; i<m; ++i){
01703 for(j=i; j<m; ++j){
01704 int lm;
01705
01706 for(l=0, tmp=0.0; l<n; ++l){
01707 lm=l*m;
01708 tmp+=jac[lm+i]*jac[lm+j];
01709 }
01710
01711
01712 jacTjac[i*m+j]=jacTjac[j*m+i]=tmp;
01713 }
01714
01715
01716 for(l=0, tmp=0.0; l<n; ++l)
01717 tmp+=jac[l*m+i]*e[l];
01718 jacTe[i]=tmp;
01719 }
01720 }
01721 else{
01722
01723
01724 nr_trans_mat_mat_mult(jac, jacTjac, n, m);
01725
01726
01727 for(i=0; i<m; ++i)
01728 jacTe[i]=0.0;
01729
01730 for(i=0; i<n; ++i){
01731 register double *jacrow;
01732
01733 for(l=0, jacrow=jac+i*m, tmp=e[i]; l<m; ++l)
01734 jacTe[l]+=jacrow[l]*tmp;
01735 }
01736 }
01737
01738
01739
01740
01741
01742
01743 for(i=j=numactive=0, p_L2=jacTe_inf=0.0; i<m; ++i){
01744 if(ub && p[i]==ub[i]){ ++numactive; if(jacTe[i]>0.0) ++j; }
01745 else if(lb && p[i]==lb[i]){ ++numactive; if(jacTe[i]<0.0) ++j; }
01746 else if(jacTe_inf < (tmp=FABS(jacTe[i]))) jacTe_inf=tmp;
01747
01748 diag_jacTjac[i]=jacTjac[i*m+i];
01749 p_L2+=p[i]*p[i];
01750 }
01751
01752
01753 #if 0
01754 if(!(k%100)){
01755 printf("Current estimate: ");
01756 for(i=0; i<m; ++i)
01757 printf("%.9g ", p[i]);
01758 printf("-- errors %.9g %0.9g, #active %d [%d]\n", jacTe_inf, p_eL2, numactive, j);
01759 }
01760 #endif
01761
01762
01763 if(j==numactive && (jacTe_inf <= eps1)){
01764 Dp_L2=0.0;
01765 stop=1;
01766 break;
01767 }
01768
01769
01770 if(k==0){
01771 if(!lb && !ub){
01772 for(i=0, tmp=DBL_MIN; i<m; ++i)
01773 if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i];
01774 mu=tau*tmp;
01775 }
01776 else
01777 mu=CNST(0.5)*tau*p_eL2;
01778 }
01779
01780
01781 while(1){
01782
01783 for(i=0; i<m; ++i)
01784 jacTjac[i*m+i]+=mu;
01785
01786
01787 issolved = nr_ax_eq_b_LU(jacTjac, jacTe, Dp, m);
01788
01789 if(issolved){
01790 for(i=0; i<m; ++i)
01791 pDp[i]=p[i] + Dp[i];
01792
01793
01794 boxProject(pDp, lb, ub, m);
01795 for(i=0, Dp_L2=0.0; i<m; ++i){
01796 Dp[i]=tmp=pDp[i]-p[i];
01797 Dp_L2+=tmp*tmp;
01798 }
01799
01800
01801 if(Dp_L2<=eps2_sq*p_L2){
01802 stop=2;
01803 break;
01804 }
01805
01806 if(Dp_L2>=(p_L2+eps2)/(CNST(LM_EPSILON)*CNST(LM_EPSILON))){
01807 stop=4;
01808 break;
01809 }
01810
01811 (*func)(pDp, hx, m, n, adata); ++nfev;
01812 for(i=0, pDp_eL2=0.0; i<n; ++i){
01813 hx[i]=tmp=x[i]-hx[i];
01814 pDp_eL2+=tmp*tmp;
01815 }
01816
01817 if(pDp_eL2<=gamma_sq*p_eL2){
01818 for(i=0, dL=0.0; i<m; ++i)
01819 dL+=Dp[i]*(mu*Dp[i]+jacTe[i]);
01820
01821 #if 1
01822 if(dL>0.0){
01823 dF=p_eL2-pDp_eL2;
01824 tmp=(CNST(2.0)*dF/dL-CNST(1.0));
01825 tmp=CNST(1.0)-tmp*tmp*tmp;
01826 mu=mu*( (tmp>=CNST(LM_ONE_THIRD))? tmp : CNST(LM_ONE_THIRD) );
01827 }
01828 else
01829 mu=(mu>=pDp_eL2)? pDp_eL2 : mu;
01830 #else
01831
01832 mu=(mu>=pDp_eL2)? pDp_eL2 : mu;
01833 #endif
01834
01835 nu=2;
01836
01837 for(i=0 ; i<m; ++i)
01838 p[i]=pDp[i];
01839
01840 for(i=0; i<n; ++i)
01841 e[i]=hx[i];
01842 p_eL2=pDp_eL2;
01843 ++nLMsteps;
01844 gprevtaken=0;
01845 break;
01846 }
01847 }
01848 else{
01849
01850
01851
01852 mu*=nu;
01853 nu2=nu<<1;
01854 if(nu2<=nu){
01855 stop=5;
01856 break;
01857 }
01858 nu=nu2;
01859
01860 for(i=0; i<m; ++i)
01861 jacTjac[i*m+i]=diag_jacTjac[i];
01862
01863 continue;
01864 }
01865
01866
01867
01868
01869
01870
01871 for(i=0, jacTeDp=0.0; i<m; ++i){
01872 jacTe[i]=-jacTe[i];
01873 jacTeDp+=jacTe[i]*Dp[i];
01874 }
01875
01876 if(jacTeDp<=-rho*pow(Dp_L2, _LM_POW_/CNST(2.0))){
01877
01878 int mxtake, iretcd;
01879 double stepmx;
01880
01881 tmp=(double)sqrt(p_L2); stepmx=CNST(1e3)*( (tmp>=CNST(1.0))? tmp : CNST(1.0) );
01882
01883 #if 1
01884
01885 lm_lnsrch(m, p, p_eL2, jacTe, Dp, alpha, pDp, &pDp_eL2, func, fstate,
01886 &mxtake, &iretcd, stepmx, steptl, NULL);
01887 if(iretcd!=0) goto gradproj;
01888 #else
01889
01890 for(t=tini; t>tmin; t*=beta){
01891 for(i=0; i<m; ++i){
01892 pDp[i]=p[i] + t*Dp[i];
01893
01894 }
01895
01896 (*func)(pDp, hx, m, n, adata); ++nfev;
01897 for(i=0, pDp_eL2=0.0; i<n; ++i){
01898 hx[i]=tmp=x[i]-hx[i];
01899 pDp_eL2+=tmp*tmp;
01900 }
01901
01902 if(pDp_eL2<=p_eL2 + CNST(2.0)*t*alpha*jacTeDp) break;
01903 }
01904 #endif
01905 ++nLSsteps;
01906 gprevtaken=0;
01907
01908
01909
01910
01911 }
01912 else{
01913 gradproj:
01914
01915
01916
01917
01918
01919 for(i=0, tmp=0.0; i<m; ++i)
01920 tmp=jacTe[i]*jacTe[i];
01921 tmp=(double)sqrt(tmp);
01922 tmp=CNST(100.0)/(CNST(1.0)+tmp);
01923 t0=(tmp<=tini)? tmp : tini;
01924
01925 for(t=(gprevtaken)? t : t0; t>tming; t*=beta){
01926 for(i=0; i<m; ++i)
01927 pDp[i]=p[i] - t*jacTe[i];
01928 boxProject(pDp, lb, ub, m);
01929 for(i=0; i<m; ++i)
01930 Dp[i]=pDp[i]-p[i];
01931
01932 (*func)(pDp, hx, m, n, adata); ++nfev;
01933 for(i=0, pDp_eL2=0.0; i<n; ++i){
01934 hx[i]=tmp=x[i]-hx[i];
01935 pDp_eL2+=tmp*tmp;
01936 }
01937 for(i=0, tmp=0.0; i<m; ++i)
01938 tmp+=jacTe[i]*Dp[i];
01939
01940 if(gprevtaken && pDp_eL2<=p_eL2 + CNST(2.0)*CNST(0.99999)*tmp){
01941 t=t0;
01942 gprevtaken=0;
01943 continue;
01944 }
01945
01946 if(pDp_eL2<=p_eL2 + CNST(2.0)*alpha*tmp) break;
01947 }
01948
01949 ++nPGsteps;
01950 gprevtaken=1;
01951
01952 }
01953
01954
01955
01956 for(i=0, Dp_L2=0.0; i<m; ++i){
01957 tmp=pDp[i]-p[i];
01958 Dp_L2+=tmp*tmp;
01959 }
01960
01961
01962 if(Dp_L2<=eps2_sq*p_L2){
01963 stop=2;
01964 break;
01965 }
01966
01967 for(i=0 ; i<m; ++i)
01968 p[i]=pDp[i];
01969
01970 for(i=0; i<n; ++i)
01971 e[i]=hx[i];
01972 p_eL2=pDp_eL2;
01973 break;
01974 }
01975 }
01976
01977 if(k>=itmax) stop=3;
01978
01979 for(i=0; i<m; ++i)
01980 jacTjac[i*m+i]=diag_jacTjac[i];
01981
01982 if(info){
01983 info[0]=init_p_eL2;
01984 info[1]=p_eL2;
01985 info[2]=jacTe_inf;
01986 info[3]=Dp_L2;
01987 for(i=0, tmp=DBL_MIN; i<m; ++i)
01988 if(tmp<jacTjac[i*m+i]) tmp=jacTjac[i*m+i];
01989 info[4]=mu/tmp;
01990 info[5]=(double)k;
01991 info[6]=(double)stop;
01992 info[7]=(double)nfev;
01993 info[8]=(double)njev;
01994 }
01995
01996
01997 if(covar){
01998 nr_lmcovar(jacTjac, covar, p_eL2, m, n);
01999 }
02000
02001 if(freework) free(work);
02002
02003 #if 0
02004 printf("%d LM steps, %d line search, %d projected gradient\n", nLMsteps, nLSsteps, nPGsteps);
02005 #endif
02006
02007 return (stop!=4)? k : -1;
02008 }
02009
02010
02011
02012
02013
02014 struct lmbc_dif_data{
02015 void (*func)(double *p, double *hx, int m, int n, void *adata);
02016 double *hx, *hxx;
02017 void *adata;
02018 double delta;
02019 };
02020
02021 void lmbc_dif_func(double *p, double *hx, int m, int n, void *data) {
02022 struct lmbc_dif_data *dta=(struct lmbc_dif_data *)data;
02023
02024
02025 (*(dta->func))(p, hx, m, n, dta->adata);
02026 return;
02027 }
02028
02029 void lmbc_dif_jacf(double *p, double *jac, int m, int n, void *data) {
02030 struct lmbc_dif_data *dta=(struct lmbc_dif_data *)data;
02031
02032
02033 (*(dta->func))(p, dta->hx, m, n, dta->adata);
02034 nr_fdif_forw_jac_approx(dta->func, p, dta->hx, dta->hxx, dta->delta, jac, m, n, dta->adata);
02035 return;
02036 }
02037
02038
02039
02040
02041
02042
02043
02044 int nr_lmdif_bc(
02045 void (*func)(double *p, double *hx, int m, int n, void *adata),
02046 double *p,
02047 double *x,
02048 int m,
02049 int n,
02050 double *lb,
02051 double *ub,
02052 int itmax,
02053 double opts[5],
02054
02055
02056
02057
02058
02059 double info[LM_INFO_SZ],
02060
02061
02062
02063
02064
02065
02066
02067
02068
02069
02070
02071
02072
02073 double *work,
02074 double *covar,
02075 void *adata)
02076
02077
02078 {
02079 struct lmbc_dif_data data;
02080 int ret;
02081
02082
02083
02084 data.func=func;
02085 data.hx=(double *)malloc(2*n*sizeof(double));
02086 if(!data.hx){
02087 bpm_error( "memory allocation request failed in nr_lmdif_bc(...)",
02088 __FILE__, __LINE__ );
02089 exit(1);
02090 }
02091 data.hxx=data.hx+n;
02092 data.adata=adata;
02093 data.delta=(opts)? FABS(opts[4]) : (double)LM_DIFF_DELTA;
02094
02095 ret=nr_lmder_bc( lmbc_dif_func, lmbc_dif_jacf, p, x, m, n, lb, ub,
02096 itmax, opts, info, work, covar, (void *)&data );
02097
02098 if(info)
02099 info[7]+=info[8]*(m+1);
02100
02101 free(data.hx);
02102
02103 return ret;
02104 }