#include #include #include #include #include void main_cal(int,int); void mul_fft_fuku(double *,double *,int); void mul_fft_dataset(short int *,int,double *,int); void mul_fft_dataput_type3(short int *,int,int,double *,int); void mul_fft_dataput_type2(short int *,int,int,double *,int); void lmulmt_herf(short int *,short int *,short int *,int,int,int,double *,int); void mul_harf_hosei(short int *,short int *,short int *,short int *,short int *,int,int,int,int,int); void gyaku_herf(short int *,short int *,int,double *); void gmul_xa(short int *,short int *,int,double *); void gmul_precalc(short int *,short int *,int,double *); void rdft(int, int, double *); void main(void) { main_cal(1024,1); } void main_cal(int bx,int seed) { short int *stsu,*st1,*su1; int i,mr; double *cp; long t_1, t_2; stsu=(short int *)malloc((bx/4+4+2)*sizeof(short int)*2+10); cp=(double *)malloc((bx/4+4)*sizeof(double)*2+(bx/4+2)*sizeof(short int)); if(cp == NULL){ printf("Memory Allocation Failure!\n"); exit(1); } printf("use memory = %d\n",(bx/4+4)*sizeof(short int)*2+2+(bx/4+4)*sizeof(double)*2+(bx/4+2)*sizeof(short int)); for(i=0;i<(bx/4+4)*2;i++) stsu[i]=rand()%10000; srand(seed); mr=bx/4-1; st1=stsu; su1=st1+mr+2+3; *((int *)st1)=mr; for(i=2;i<2+mr+3;i++){ st1[i]=rand()%10000; } if(st1[2]==0) st1[2]=rand()%10000; t_1=clock(); st1[2+mr+0]=0; st1[2+mr+1]=0; st1[2+mr+2]=0; gyaku_herf(st1,su1,mr,cp); t_2=clock(); lmulmt_herf(st1,su1,st1,mr,mr,mr,cp,mr); for(i=0;i0;i--){ b[i]=9999-b[i]; } mul_fft_dataset(b,kt,acp,shr); rdft(shr, 1, acp); mul_fft_fuku(acp,bcp,shr); rdft(shr,-1, acp); mul_fft_dataput_type3(b,kt+kt-1,kt,acp,shr); } void gmul_xa(short int *a,short int *b,int kt,double *acp) { int i,shr,b1,b2,b3,b4,be1,be2,be3,over_chk; double *bcp,und1,und2,und3; shr=(kt+1)*2; bcp=acp+shr+4; b1=b[0]; b2=b[1]; b3=b[2]; b4=b[3]; be1=b[kt+0]; be2=b[kt+1]; be3=b[kt+2]; mul_fft_dataset(a,kt,acp,shr); rdft(shr, 1, acp); mul_fft_dataset(b,kt,bcp,shr); rdft(shr, 1, bcp); mul_fft_fuku(acp,bcp,shr); rdft(shr,-1, acp); mul_fft_dataput_type3(b+kt,kt,kt,acp,shr); mul_fft_dataset(a+kt,kt,acp,shr); rdft(shr, 1, acp); mul_fft_fuku(acp,bcp,shr); rdft(shr,-1, acp); und1=(double)be1*(shr/2.0); und2=(double)be2*(shr/2.0); und3=(double)be3*(shr/2.0); for(i=0;i=kt;i--){ b[i]=9999-b[i]; } } mul_fft_dataset(b+kt+2,kt,acp,shr); rdft(shr, 1, acp); mul_fft_fuku(acp,bcp,shr); rdft(shr,-1, acp); acp[kt+2]+=b[kt+2]*be1*(shr/2.0); acp[kt+3]+=b[kt+2]*be2*(shr/2.0); acp[kt+4]+=b[kt+2]*be3*(shr/2.0); acp[kt+3]+=b[kt+3]*be1*(shr/2.0); acp[kt+4]+=b[kt+3]*be2*(shr/2.0); acp[kt+5]+=b[kt+3]*be3*(shr/2.0); acp[kt+4]+=b[kt+4]*be1*(shr/2.0); acp[kt+5]+=b[kt+4]*be2*(shr/2.0); acp[kt+6]+=b[kt+4]*be3*(shr/2.0); acp[kt+2]+=b[kt*2+2]*b1*(shr/2.0); acp[kt+3]+=b[kt*2+2]*b2*(shr/2.0); acp[kt+4]+=b[kt*2+2]*b3*(shr/2.0); acp[kt+3]+=b[kt*2+3]*b1*(shr/2.0); acp[kt+4]+=b[kt*2+3]*b2*(shr/2.0); acp[kt+5]+=b[kt*2+3]*b3*(shr/2.0); mul_fft_dataput_type3(b+kt+1,kt+kt,kt+3,acp,shr); if(over_chk==1){ for(i=kt*2+3;i>=kt;i--){ b[i]=9999-b[i]; } } b[kt+2]+=be3; b[kt+1]+=be2; b[kt+0]+=be1; if(b[kt+2]>9999){b[kt+2]-=10000; b[kt+1]++;} if(b[kt+1]>9999){b[kt+1]-=10000; b[kt+0]++;} if(b[kt+0]>9999){b[kt+0]-=10000; } } void lmulmt_herf(short int *a,short int *b,short int *out,int kta,int ktb,int rkt,double *acp,int limit) { int i,hlim,d,cy,shr,sis,ktachk,ktbchk,new_rkt,new_kta,new_ktb; short int *a1,*a2,*b1,*b2,*tmp,*t1,*t2; double *bcp; hlim=limit/2; shr=2; while(hlim*2+1>shr){ shr*=2; } bcp=acp+shr+4; tmp=(short int *)(bcp+shr+4); a1=a+2; a2=a1+hlim; b1=b+2; b2=b1+hlim; t1=tmp; t2=tmp+hlim; ktachk=0; ktbchk=0; new_rkt=hlim*2; if(kta>new_rkt) { new_kta=hlim; ktachk=1;} else new_kta=kta-hlim; if(ktb>new_rkt) { new_ktb=hlim; ktbchk=1;} else new_ktb=ktb-hlim; for(i=0;i=0;i--){ d=tmp[i]+cy; cy=d*0.0001; tmp[i]=d-cy*10000; if(cy==0) break; } sis=*((int *)a)+*((int *)b); if(tmp[0]!=0){ for(i=rkt-1;i>=0;i--){ out[i+2]=tmp[i]; } } else{ for(i=rkt-1;i>=0;i--){ out[i+2]=tmp[i+1]; } sis--; } *((int *)out)=sis; } void mul_harf_hosei(short int *a1,short int *a2,short int *b1,short int *b2,short int *tmp, int kta,int ktb,int rkt,int ktachk,int ktbchk) { int tmp1,tmp2,tmp3; long double gs; if(kta<2 && ktb<2){ gs=(long double)(a2[0]*10000)*(b2[0]*10000); } else if(kta<2){ gs=(long double)(a2[0]*10000)*(b2[0]*10000+b2[1]); } else if(ktb<2){ gs=(long double)(a2[0]*10000+a2[1])*(b2[0]*10000); } else{ gs=(long double)(a2[0]*10000+a2[1])*(b2[0]*10000+b2[1]); } tmp1=gs*0.000000000001; tmp2=(gs-(long double)tmp1*1000000000000)*0.00000001; tmp3=(gs-(long double)tmp1*1000000000000-(long double)tmp2*100000000)*0.0001; tmp[rkt ]+=tmp1; tmp[rkt+1]+=tmp2; tmp[rkt+2]+=tmp3; gs=0.0; if(ktbchk==1) gs+=(long double)(a1[0]*100000000.0+a1[1]*10000.0+a2[2])*b1[rkt]; if(ktachk==1) gs+=(long double)(b1[0]*100000000.0+b1[1]*10000.0+b2[2])*a1[rkt]; tmp1=gs*0.000000000001; tmp2=(gs-(long double)tmp1*1000000000000.0)*0.00000001; tmp3=(gs-(long double)tmp1*1000000000000.0-(long double)tmp2*100000000.0)*0.0001; tmp[rkt ]+=tmp1; tmp[rkt+1]+=tmp2; tmp[rkt+2]+=tmp3; tmp[rkt+1]+=tmp[rkt+2]/10000; tmp[rkt+2]%=10000; tmp[rkt ]+=tmp[rkt+1]/10000; tmp[rkt+1]%=10000; tmp[rkt-1]+=tmp[rkt ]/10000; tmp[rkt ]%=10000; } void mul_fft_dataput_type3(short int *c,int loc,int out_len,double *acp,int shr) { int i,start,deep; double scale,rgs; __int64 cy; scale=2.0/shr; start=shr-(loc+1); deep=3; while( deep > -1 ){ if( start + out_len + deep < shr+4 ) break; deep--; } cy=0; for(i=out_len+deep;i>out_len-1;i--){ rgs=acp[i+start]*scale+0.5+cy; cy=rgs*0.0001; } for(i=out_len-1;i>=0;i--){ rgs=acp[i+start]*scale+0.5+cy; cy=rgs*0.0001; c[i]=rgs-(double)cy*10000.0; } } void mul_fft_dataput_type2(short int *c,int loc,int out_len,double *acp,int shr) { int i,start,deep; double scale,rgs; __int64 cy; scale=2.0/shr; start=shr-(loc+1); deep=3; while( deep > -1 ){ if( start + out_len + deep < shr+4 ) break; deep--; } cy=0; for(i=out_len+deep;i>out_len-1;i--){ rgs=acp[i+start]*scale+0.5+cy; cy=rgs*0.0001; } for(i=out_len-1;i>=0;i--){ rgs=acp[i+start]*scale+0.5+cy+c[i]; cy=rgs*0.0001; c[i]=rgs-(double)cy*10000.0; } if((int)cy==1) c[0]+=10000; } void mul_fft_dataset(short int *a,int kta,double *acp,int shr) { int i,start; start=shr-kta; for(i=0;i