/** * Title: Project of visualizing codon frequencies * Description: * Copyright: Copyright (c) 2002 * Company: IHES, France * @author Andrey Zinovyev, Alessandra Carbone * @version 1.0 */ import java.io.*; import java.util.*; import java.lang.Integer; import java.lang.System; import org.biojava.bio.*; import org.biojava.bio.symbol.*; import org.biojava.bio.seq.*; import org.biojava.bio.seq.io.*; import Utils; public class CAIJava{ static int taskModifier = 0; static String addFeature = null; static boolean calcGCContent = false; static boolean fastaFormat = false; static int calcCodonAnalysis = 0; static StringBuffer AdditionalInfoFile = new StringBuffer(""); static float GTFAverLen[] = new float[64]; static Vector TranslationalTable; static String externalWValues = null; static boolean checkForStability = false; static int classNumber = -1; public static void main(String [] args) { try { if(args.length == 0) { throw new Exception("Use: CalcCodonFreq GenBankFile"); } // test /*float tt[] = new float[11]; tt[0]=(float)0.5; tt[1]=(float)0.1; tt[2]=(float)0.1; tt[3]=(float)0.3; tt[4]=(float)0.2; tt[5]=(float)0.4; tt[6]=(float)0.7; tt[7]=(float)0.9; tt[8]=(float)0.65; tt[9]=(float)0.05; tt[10]=(float)0.08; int nn[] = SortCais(tt); for (int i = 0; i < nn.length; i++) System.out.print(nn[i]+" "); System.out.println(); for (int i = 0; i < nn.length; i++) System.out.print(tt[nn[i]]+" "); System.out.println();*/ /*StringBuffer s = new StringBuffer(); for (int i = 0; i < 64; i++) { s.append(Utils.TripletName(i)); } calcAminoacidFreq(s.toString(),1,s.length());*/ Vector Acids = new Vector(); for (int i = 0; i < 64; i++){ SymbolList sDNA = DNATools.createDNA(Utils.TripletName(i)); SymbolList Amin = RNATools.translate(RNATools.transcribe(sDNA)); String sss = new String(Amin.seqString()); if ((!Acids.contains(sss))&&(!sss.equals("*"))) Acids.addElement(sss); } TranslationalTable = new Vector(); for(int i=0;i<64;i++){ SymbolList sDNA = DNATools.createDNA(Utils.TripletName(i)); SymbolList Amin = RNATools.translate(RNATools.transcribe(sDNA)); if (!Amin.seqString().equals("*")) { int n = Acids.indexOf(Amin.seqString()); TranslationalTable.addElement(new Integer(n)); } else TranslationalTable.addElement(new Integer(-1)); } int CodonsOrder[] = orderCodonUsageByAminoacid(Acids); Vector CodonUsages = new Vector(); int windowSize = 120; int evPosition = 21, sAnnotbeg = -1, sAnnotend = -1; int numberOfIterations = 0; int maximumGeneLength = -1; int BayezRatio = 0; String fileName = ""; for(int i=1;i<=args.length;i++) if (i1) if(args[i].equals("-ew")) { externalWValues = args[i+1]; continue; } if ((args[i].length()>2)&&(args[i].substring(0,3).equals("-cs"))) {checkForStability = true; if (args[i].length()>3) classNumber = Integer.parseInt(args[i].substring(3));; continue; } if(args[i].equals("-f")) fileName = new String(args[i+1]); if(args[i].equals("-i")) numberOfIterations = Integer.parseInt(args[i+1]); if(args[i].equals("-k")) taskModifier = Integer.parseInt(args[i+1]); if(args[i].equals("-a")) AdditionalInfoFile = new StringBuffer(args[i+1]); if(args[i].equals("-m")) maximumGeneLength = Integer.parseInt(args[i+1]); if(args[i].equals("-c")) calcCodonAnalysis = Integer.parseInt(args[i+1]); if(args[i].equals("-t")) addFeature = new String(args[i+1]);; if(args[i].equals("-s")) fastaFormat = true; if(args[i].equals("-g")) calcGCContent = true; } // Prepare gene file HashMap GeneAddInfo = new HashMap(); String StringDelimiters = new String("\t"); int GeneAddInfoCount=0; System.out.println("AdditionalInfoFile = \""+AdditionalInfoFile+"\""); if (!AdditionalInfoFile.toString().equals("")){ System.out.println("Trying to read..."); LineNumberReader lr = new LineNumberReader(new FileReader(AdditionalInfoFile.toString())); String s; s = lr.readLine(); lr.close(); lr = new LineNumberReader(new FileReader(AdditionalInfoFile.toString())); StringTokenizer st = new StringTokenizer(s,StringDelimiters); int j=0; while ((st.hasMoreTokens())) {st.nextToken(); j++; } GeneAddInfoCount = j; //lr.reset(); while ( ((s=lr.readLine()) != null)) { st = new StringTokenizer(s,StringDelimiters); //System.out.println("AddInfofile: "+s.substring(1,10)+"\t"+st.countTokens()); String[] sInfo = new String[j]; int k = 0; while ((st.hasMoreTokens())) { String sss = st.nextToken(); if((k==1)&&(sss.length()==1)) sss="0"+sss; if(k10)) { numCDSFeat++; try{ String ss = new String(Ft.getSymbols().seqString()); if(maximumGeneLength!=-1) if(ss.length()>maximumGeneLength) continue; TF = Utils.calcTripletFreq(ss,1,ss.length()); // k=CalcCodonUsageForSeq(Ft.getSymbols(),fr0,fr1,fr2); //ss = new String(Ft.getSymbols().seqString()); //if (ss.length()/3.0!=(int)(ss.length()/3.0)) // System.out.println("Mistake!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"); //System.out.println(Ft.getAnnotation().getProperty(new String("gene")).toString()+"; "+ss.length()+" symbols"); //String check = new String(Ft.getAnnotation().getProperty("gene").toString()); //GeneticCodes GCode = new GeneticCodes(); //System.out.println(GCode.translate(GCode.transcribe(Ft.getSymbols())).seqString()); //System.out.println(ss); for(int j=0;j<64;j++) GTF[j]+=TF[j]; NumberOfPoints++; ListOfGenes.addElement(Ft); float CAIV[] = new float[numberOfIterations]; CAIValues.addElement(CAIV); } catch (Exception e) {System.out.print("\n"+e+" : "); System.out.println(Ft);} } if ((Ft.getType().equals("gene"))) { if((Ft.getAnnotation().containsProperty("note"))&&(Ft.getAnnotation().containsProperty("gene"))){ String geneN = Ft.getAnnotation().getProperty("gene").toString(); String geneNote = Ft.getAnnotation().getProperty("note").toString(); //System.out.println(geneN+"\t"+geneNote); GeneNoteTable.put(geneN,geneNote); } } } System.out.println(numCDSFeat+" CDS features found"); } System.out.println("Gene Note table: "); Set se = GeneNoteTable.keySet(); for(Iterator i=se.iterator();i.hasNext();) { String ss = (String)i.next(); System.out.println(GeneNoteTable.get(ss)); } if(fastaFormat) while(seqI.hasNext()) { Sequence seq = seqI.nextSequence(); System.out.println("Got"+seq.getName()); String ss = seq.seqString(); if(ss.indexOf("N")==-1){ if(maximumGeneLength!=-1) if(ss.length()>maximumGeneLength) continue; TF = Utils.calcTripletFreq(ss,1,ss.length()); for(int j=0;j<64;j++) GTF[j]+=TF[j]; NumberOfPoints++; ListOfGenes.addElement(seq); float CAIV[] = new float[numberOfIterations]; CAIValues.addElement(CAIV); } } int sum = 0; for(int i=0;i<64;i++) sum+=GTF[i]; for(int i=0;i<64;i++) GTF[i]=GTF[i]/sum; System.out.print("CodonUsage"+" sphere "); for(int ii=0;ii<64;ii++) System.out.print(GTF[ii]+" "); System.out.println("0.4 255 0 0"); float GTFC[] = new float[64]; System.arraycopy(GTF,0,GTFC,0,64); CodonUsages.addElement(GTFC); Utils.printDistribution(GTF); float GTFAver[] = new float[64]; for (int i = 0; i < 64; i++) GTFAver[i]=GTF[i]; GenerateCAIWValuesTable(GTF,"UNIVERSAL"); GenerateCAIWValuesTableMethodAverage(GTFAver,"UNIVERSAL"); GenerateCAIWValuesSuppressingRareCodons(GTFAver,"UNIVERSAL",ListOfGenes); System.out.println(); //Utils.printDistribution(GTF); //Utils.printDistribution(GTF,"t","c","g","t"); System.out.println(); //Utils.printDistribution(GTFAver); System.out.println("Number of points = "+NumberOfPoints); // --- Calculating all iterations Random r = new Random(); float FirstCAI[] = new float[ListOfGenes.size()]; float first_p=100.0f; float cais[] = new float[ListOfGenes.size()]; for (int i = 0; i < numberOfIterations; i++) { if(i==0){ for (int j = 0; j < ListOfGenes.size(); j++) { if(!fastaFormat){ if(!checkForStability) cais[j]=CalculateCAIValue(((Feature)ListOfGenes.elementAt(j)).getSymbols().seqString(),GTF); else { cais[j]=r.nextFloat(); first_p=1f; if(classNumber!=-1){ String name = "**"; try{ name = ((Feature)ListOfGenes.elementAt(j)).getAnnotation().getProperty("gene").toString(); }catch(Exception e){}; cais[j]=0; for(int iii=0;iii100) af = af.substring(0,100); af = " \""+af+"\" "; sOut.append(af); } if(calcGCContent){ //SO = new Date(); float gc = Utils.GCContent(ss); float gc3 = Utils.GCContent3(ss); //EO = new Date(); //System.out.println("GC Cont: "+(EO.getTime()-SO.getTime())); sOut.append(" "+gc+" "+gc3+" "); } if(externalWValues!=null) { float caivext = CalculateCAIValue(ss,extWValues); sOut.append(caivext+" "); } sOut.append(caiv+" "); sOut.append(caivclassic+" "); sOut.append(caiv1+" "); sOut.append(caiv2+" "); sOut.append(ss.length()+" "); for (int ii = 0; ii < numberOfIterations; ii++) { float mas[] = (float[])CAIValues.elementAt(i); sOut.append(mas[ii]+" "); } String geneName = new String("Unknown"); String geneNote = new String("Unknown"); if (Ft.getAnnotation().containsProperty("gene")) geneName = new String(Ft.getAnnotation().getProperty(new String("gene")).toString()); // if (Ft.getAnnotation().containsProperty("note")) // { // geneNote = new String(Ft.getAnnotation().getProperty(new String("note")).toString()); // System.out.println(geneName+" "+geneNote); // } if(GeneNoteTable.containsKey(geneName)) geneNote = (String)(GeneNoteTable.get(geneName)); else geneNote = new String(""); if ((GeneAddInfo.containsKey(geneName))||(GeneAddInfo.containsKey(geneNote))){ String sm[]; if (GeneAddInfo.containsKey(geneName)) sm = (String [])(GeneAddInfo.get(geneName)); else{ sm = (String [])(GeneAddInfo.get(geneNote)); //System.out.println("!!!"+geneNote); } //GeneAddInfo.get() for (int ii = 1; ii < GeneAddInfoCount; ii++) { //System.out.println(sm[ii].substring(1,2)+"//"+sm[ii].substring(0,1)); if(sm[ii]==null) {sOut.append("\" \""); continue; } if((sm[ii].length()!=0)&&(sm[ii].substring(0,1).equals("\""))) sOut.append(sm[ii]+" "); else sOut.append("\""+sm[ii]+"\" "); }} else for (int ii = 1; ii < GeneAddInfoCount; ii++) sOut.append("\"\" "); fout.println(sOut.toString()); //Date EO1 = new Date(); //System.out.println("Total("+ss.length()+"): "+(EO1.getTime()-SO1.getTime())); } catch (Exception e) {/**System.out.print("\n"+e+" : "); System.out.println(Ft);**/ System.out.println(sOut.toString()); e.printStackTrace();} } } // } } catch (Throwable t) { t.printStackTrace(); System.exit(1); } } public static void GenerateCAIWValuesTable(float WW[], String CodeName){ TranslationTable TranTable = RNATools.getGeneticCode(CodeName); Alphabet Codons = TranTable.getSourceAlphabet(); HashMap Mp = new HashMap(); try{ for(int i=0;i<64;i++){ SymbolList sDNA = DNATools.createDNA(Utils.TripletName(i)); //System.out.println(sDNA.seqString()); Symbol sb = ((FiniteAlphabet)Codons).getSymbol((RNATools.transcribe(sDNA)).toList()); //System.out.println(sb.toString()+" => "+TranTable.translate(sb)); float f=-1; if (Mp.containsKey(TranTable.translate(sb))) f = ((Float)Mp.get(TranTable.translate(sb))).floatValue(); if (WW[i]>f) Mp.put(TranTable.translate(sb),new Float(WW[i])); } for(int i=0;i<64;i++){ SymbolList sDNA = DNATools.createDNA(Utils.TripletName(i)); Symbol sb = ((FiniteAlphabet)Codons).getSymbol((RNATools.transcribe(sDNA)).toList()); if (((Float)Mp.get(TranTable.translate(sb))).floatValue()!=0) WW[i]/=((Float)Mp.get(TranTable.translate(sb))).floatValue(); else WW[i]=0; } }catch(Exception e){ System.out.println(e); } } public static void GenerateCAIWValuesTableRegardingGeneLength(float WW[], String CodeName,Vector V){ try{ float WWW[][] = new float[V.size()][64]; for(int i=0;i "+TranTable.translate(sb)); float f=0; int nn=0; if (Mp.containsKey(TranTable.translate(sb))) f = ((Float)Mp.get(TranTable.translate(sb))).floatValue(); if (MpN.containsKey(TranTable.translate(sb))) nn = ((Integer)MpN.get(TranTable.translate(sb))).intValue(); Mp.put(TranTable.translate(sb),new Float(WW[i]+f)); MpN.put(TranTable.translate(sb),new Integer(nn+1)); // System.out.println(sb.getName()+" "+(WW[i]+f)+" "+(nn+1)); } for(int i=0;i<64;i++){ SymbolList sDNA = DNATools.createDNA(Utils.TripletName(i)); Symbol sb = ((FiniteAlphabet)Codons).getSymbol((RNATools.transcribe(sDNA)).toList()); if (((Float)Mp.get(TranTable.translate(sb))).floatValue()!=0) WW[i]/=((Float)Mp.get(TranTable.translate(sb))).floatValue(); else WW[i]=0; } }catch(Exception e){ System.out.println(e); } } public static int[] calcAminoacidFreq(String s, Vector Acids, int st, int en){ int[] Res = new int[20]; try{ String ss = s.substring(st-1,en-1); int len = ss.length(); SymbolList Tripl; for(int i=0; i 0); return res; } }