new; cls; let data[15,3]= 1.308 0.114 0.453 -9.475 -9.835 -8.722 2.237 0.169 0.277 9.452 9.111 10.902 -1.302 0.046 0.791 -9.667 -10.51 -10.910 -10.822 -11.12 -10.301 -9.819 -9.288 -10.471 10.503 9.094 9.051 0.543 -1.358 -0.226 -10.291 -10.33 -10.672 9.687 10.33 9.632 10.111 10.31 8.799 2.246 1.216 0.893 10.121 10.002 10.252 ; B=2000; call cluster0GAPu(data,B); proc(0)=cluster0GAPu(data,B); local n,label,D,i,j,x,xi,cxi,k,Dmin,is,js,e,gap,sd,gapk,sdk; n=rows(data); gap=zeros(n-1,1); sd=zeros(n-1,1); print "Nearest Neighbor Method:"; x=seqa(1,1,n); xi=ones(n,1); k=1; do while k<=n-2; Dmin=1e256; is=0; js=0; cxi=0|cumsumc(xi); i=1; do while i<=rows(xi)-1; j=i+1; do while j<=rows(xi); D=minD(data[x[cxi[i]+1:cxi[i+1],.],.],data[x[cxi[j]+1:cxi[j+1],.],.]); if Dmin>D; Dmin=D; is=i; js=j; endif; j=j+1; endo; i=i+1; endo; e=zeros(n,1); e[cxi[is]+1:cxi[is+1]]=ones(xi[is],1); e[cxi[js]+1:cxi[js+1]]=ones(xi[js],1); x=x[cxi[is]+1:cxi[is+1]]|x[cxi[js]+1:cxi[js+1]]|delif(x,e); e=zeros(rows(xi),1); e[is]=1; e[js]=1; xi=(xi[is]+xi[js])|delif(xi,e); {gapk,sdk}=Gapuni(data,x,xi,B); gap[k]=gapk; sd[k]=sdk; print "Dmin=" Dmin;; print/rz " between" is " &" js; print/lz "level" k;; call displayg(x,xi); k=k+1; endo; Dmin=minD(data[x[1:xi[1],.],.],data[x[xi[1]+1:n,.],.]); xi=xi[1]+xi[2]; {gapk,sdk}=Gapuni(data,x,xi,B); gap[n-1]=gapk; sd[n-1]=sdk; print "Dmin=" Dmin;; print/rz " between" 1 " &" 2; print/lz "level" k;; call displayg(x,xi); /* Gap statistic */ gap=rev(gap); sd=rev(sd); print; print "Gap statistic:"; print " k Gap[k] Gap[k+1]-sd[k+1]";; print/rz seqa(1,1,n-2)~gap[1:n-2]~(gap[2:n-1]-sd[2:n-1]); i=1; do while i<=n-2; if gap[i]>=gap[i+1]-sd[i+1]; print/lz "k*=" i; break; endif; i=i+1; endo; endp; proc(0)=displayg(x,xi); local k,cxi,i; k=rows(xi); cxi=0|cumsumc(xi); if maxc(x)<100; format/rz 2,2; else; format/rz 3,3; endif; i=1; do while i<=k-1; print x[cxi[i]+1:cxi[i+1]]';; print ",";; i=i+1; endo; print x[cxi[k]+1:cxi[k+1]]'; format /mb1 /ros 16,8; endp; proc minD(x1,x2); local n1,n2,i,j,Dmin,D; n1=rows(x1); n2=rows(x2); Dmin=1e256; j=1; do while j<=n2; i=1; do while i<=n1; D=sumc((x1[i,.]'-x2[j,.]')^2)^(1/2); if Dmin>D; Dmin=D; endif; i=i+1; endo; j=j+1; endo; retp(Dmin); endp; proc(2)=Gapuni(data,x,xi,B); local W,Wb,i,xb,gap,sd; W=calcSSw(data,x,xi); Wb=zeros(B,1); i=1; do while i<=B; xb=calcxb(x,xi); Wb[i]=calcSSw(data,xb,xi); i=i+1; endo; gap=1/B*sumc(ln(Wb))-ln(W); sd=stdc(ln(Wb))*sqrt(B-1)/sqrt(B)*sqrt(1+1/B); retp(gap,sd); endp; proc calcxb(x,xi); local n,k,index,xb,xr,i,j; n=rows(x); k=rows(xi); xb={}; j=1; do while j<=k; index=seqa(1,1,n); i=1; do while i<=xi[j]; xr=index[floor((n-i+1)*rndu(1,1))+1]; xb=xb|xr; index=delif(index,index.==xr); i=i+1; endo; j=j+1; endo; retp(xb); endp; proc calcSSw(data,x,xi); local n,k,cxi,i,mk,SSw; n=rows(x); k=rows(xi); cxi=0|cumsumc(xi); mk=zeros(k,cols(data)); i=1; do while i<=k; mk[i,.]=meanc(data[x[cxi[i]+1:cxi[i+1]],.])'; i=i+1; endo; SSw=0; i=1; do while i<=k; SSw=SSw+1/(xi[i]/meanc(xi))*sumc(sumc(((data[x[cxi[i]+1:cxi[i+1]],.]-mk[i,.])^2)')); i=i+1; endo; retp(SSw); endp;