%Macro Clustering(inputfile=,outputfilestats=, outputfileclusters=, id=, samplesize=, pagetitle=); %* inputing the data; proc import out=WORK.True datafile = "&inputfile" dbms=tab replace; getnames=YES; datarow=2; run; %* clustering; proc cluster data=True noeigen method=average CCC pseudo outtree=tree noprint; id &id;title"&pagetitle";run; %* identifying the optimum number of clusters; data true1; set tree; keep _ncl_ _ccc_ _psf_ _pst2_; if _ncl_ = &samplesize then delete; proc sort data=true1; by _ncl_; proc print data = true1; proc export data=true1 outfile= "&outputfilestats" dbms=tab replace; run; %* identifying the number of clusters; proc iml; reset noprint; percent = round(0.3 * &samplesize); use true1 where (_ncl_ <= percent); read all into stats; psf_max = stats[<:>,2]; ccc_max = stats[<:>,4]; call symput('clust',char(ccc_max)); if psf_max ^= ccc_max then if stats[(psf_max-1),3] > stats[(ccc_max-1),3] then call symput('clust',char(psf_max)); quit; %* identifying and outputting the clusters; proc tree data=tree out=treeout nclusters=&clust ;id &id; proc sort data=treeout; by CLUSTER; proc print data=treeout;var &id CLUSTER; proc export data=treeout outfile= "&outputfileclusters" dbms=tab replace;run; %mend Clustering; %let source = C:\Documents and Settings\Zaid Abdo\My Documents\Main Storage\Research\Larry\Methods Paper\Cleaned Data\IndivPeaksraw1bp.txt; %let dist1 = C:\Documents and Settings\Zaid Abdo\My Documents\Main Storage\Research\Larry\Methods Paper\Results\Statistics.txt; %let dist2 = C:\Documents and Settings\Zaid Abdo\My Documents\Main Storage\Research\Larry\Methods Paper\Results\Clusters.txt; %let title = Peak Data Raw 1 bp; %Clustering(inputfile=&source,outputfilestats=&dist1, outputfileclusters=&dist2, id = length, samplesize=65, pagetitle=&title);