iSAS code This SAS code was written to compare UniGene Cluster Identification numbers obtained from microarray studies to the UniGene data base. If you have SAS available, you can copy and paste this code into your SAS program window. The input files can be obtained by following these links: testis.txt UniGene.txt

/***this inputs the "testis.txt" data and defines a dummy variable "dum". [path] defines the directory path where the input file is saved***/

data one;
    infile '[path]testis.txt';
    input clusid;
    if clusid=. then delete;
    dum=clusid;
run;


/***this command sorts the data by the dummy variable and removes any duplicate cluster identification numbers***/


proc sort data=one nodupkey;
    by dum;
run;

/***this inputs the "UniGene.txt" data and defines a dummy variable "dum"***/

data two;
    infile '[path]UniGene.txt';
    input clusid2;
    if clusid2=. then delete;
    dum=clusid2;
run;

/***this command sorts the data by the dummy variable and removes any duplicate cluster identification numbers***/

proc sort data=two nodupkey;
    by dum;
run;

/***this command combines the testis and UniGene data sets and compares their accession numbers***/

data comb;
    merge one two;
    by dum;
    if clusid=clusid2;
    drop dum;
run;


/***This command prints those accession numbers that match between the microarray and UniGene data sets in two columns.  The first column will contain cluster identification numbers from the testis (clusid), the second cluster identification numbers from the UniGene database (clusid2). ***/

proc print data=comb;
run;


/***this command combines the testis and Unigene data sets and compares their accession numbers***/


data comb;
    merge one two;
    by dum;
    if clusid=clusid2 then delete;
    drop dum;
run;


/***This command prints those accession numbers that do not match between the microarray and UniGene data sets in two columns.  The first column will contain cluster identification numbers from the testis (clusid), the second cluster identification numbers from the UniGene database (clusid2). ***/


proc print data=comb;
run;



|home|protamines|nuclear matrix|RNA in sperm|bioinformatics|SAK|RPM|publications|protocols|journal club|links|

last updated 2002.11.24  | maintained by RPM
optimized for IE (5+) browswers, 1024X768 resolution