Log in Help
Print
Homereleasesgate-5.1-beta2-build3402-ALLpluginsOntology_BDM_ComputationsrcgatebdmComp 〉 BDMCompMain.java
 
/*
 *  BDMCompMain.java
 * 
 *  Yaoyong Li 15/03/2009
 *
 *  $Id: IaaMain.java, v 1.0 2009-03-15 12:58:16 +0000 yaoyong $
 */
package gate.bdmComp;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;

import gate.Factory;
import gate.FeatureMap;
import gate.Gate;
import gate.ProcessingResource;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.ontology.OClass;
import gate.creole.ontology.Ontology;

public class BDMCompMain extends AbstractLanguageAnalyser implements
ProcessingResource {
  /** File name or URL storing the ontology. */
  URL ontologyURL = null;
  /** The ontology used. */
  Ontology ontologyUsed = null;
  /** The file storing the BDM score. */
  URL outputBDMFile = null;
  /** store the BDM information for each pair of concepts. */
  Set<BDMOne>bdmScores = null;
  
	/** Initialise this resource, and return it. */
	public gate.Resource init() throws ResourceInstantiationException {
	// step 2: load the Ontology_Tools plugin 
    File ontoHome = new File(Gate.getPluginsHome(),"Ontology_Tools"); 
    try {
      Gate.getCreoleRegister().addDirectory(ontoHome.toURI().toURL());
    }
    catch(MalformedURLException e) {
      e.printStackTrace();
    }
    bdmScores = new HashSet<BDMOne>();
		return this;
	} // init()
	
	HashMap<OClass,Integer> concept2id= new HashMap<OClass,Integer>();
	/**
	 * Run the resource.
	 * 
	 * @throws ExecutionException
	 */
	public void execute() throws ExecutionException {
	  //open the result file
	  if(corpus != null) {
	    if(corpus.size() != 0)
	      if(corpus.indexOf(document)>0)
	        return;
	  }
	  BufferedWriter bdmResultsWriter = null;
	  boolean isExistingResultFile = false;
	  try {
	    if(outputBDMFile != null && !outputBDMFile.toString().equals("")) {
	      bdmResultsWriter = new BufferedWriter(new OutputStreamWriter(
	        new FileOutputStream(new File(outputBDMFile.toURI())), "UTF-8"));
	      isExistingResultFile = true;
	    }
	    else {
	      System.out.println("There is no file specified for storing the BDM scores!");
	    }
    
	  /** load the ontology. */ 
	  //if(ontologyUsed == null || ontologyUsed.toString().trim() == "") {
	    if(ontologyURL == null || ontologyURL.toString().trim() == "") {
        throw new ExecutionException("No ontology: neither using a loaded ontology nor giving the ontology URL!");
	    } else { 
	      // step 3: set the parameters 
	      FeatureMap fm = Factory.newFeatureMap(); 
	      fm.put("rdfXmlURL", ontologyURL); 
	      // step 4: finally create an instance of ontology 
	      try {
          ontologyUsed = (Ontology)Factory.createResource("gate.creole.ontology.owlim.OWLIMOntologyLR", fm);
        }
        catch(ResourceInstantiationException e) {
          e.printStackTrace();
        } 
	    }
	  //}
	    //write the header of the bdm score file
	    if(isExistingResultFile) {
	      bdmResultsWriter.append("##The following are the BDM scores for ");
	      bdmResultsWriter.append("each pair of concepts in the ontology named "+ontologyUsed.getName()+".\n");
	    }
	    // retrieving a list of top classes 
	    Set<OClass> topClasses = ontologyUsed.getOClasses(true);
	    if(topClasses.size()>1) {
	      System.out.println("The ontology has "+topClasses.size() +" top classes!!");
	    }
	    // retrieving a list of all classes 
      Set<OClass> allConcepts = ontologyUsed.getOClasses(false);
      //assign a number id to each class
      
      HashMap<Integer,OClass> id2concept= new HashMap<Integer,OClass>();
      int num=1;
      for(OClass ob:allConcepts) {
        concept2id.put(ob, new Integer(num));
        id2concept.put(new Integer(num), ob);
        //System.out.println(num+", *"+ob.getName()+"*"+", id="+concept2id.get(ob).intValue()+"*");
        ++num;
      }
      
      System.out.println("ontology "+ontologyUsed.getName()+", allConcepts="+allConcepts.size());
      
      //for each concept, get the chain from it to the top class
      HashMap<OClass,String> concept2chain = new HashMap<OClass,String>();
      //obtainChains(concept2id, concept2chain);
      num=1;
      for(OClass curCon:allConcepts) {
        String chainSofar = "";
        int numS = curCon.getSuperClasses(OClass.DIRECT_CLOSURE).size();
        //if(numS>1)
          //System.out.println("****** curCon="+curCon.getName()+"*"+", num="+numS+"*");
        String chains = obtainAChain(curCon, chainSofar);
        concept2chain.put(curCon, chains);
        //chainId.append(concept2id.get(curCon).toString());
        /*String [] idsC = chains.split(ConstantParameters.separater2);
        for(int i=0; i<idsC.length; ++i) {
          String [] oneC = idsC[i].split(ConstantParameters.separater1);
          String conChains="";
          for(int j=0; j<oneC.length; ++j)
            conChains += " "+ id2concept.get(new Integer(oneC[j])).getName();
          System.out.println("num="+num+", concept:"+curCon.getName()+", chain="+conChains);
        }*/
        ++num;
      }
      //get the leaf nodes, and the chain length for each leafy node
      HashMap<OClass,String> leafyCon2Chain = new HashMap<OClass,String>();
      num = 1;
      for(OClass curCon:allConcepts) {
        if(curCon.getSubClasses(OClass.DIRECT_CLOSURE).size()==0) {
          //System.out.println(num+", leafy node="+curCon.getName()+"*");
          leafyCon2Chain.put(curCon, concept2chain.get(curCon));
          ++num;
        }
      }
      //compute the chain length coming through one node
      HashMap<OClass, Float> con2ChainLen = new HashMap<OClass, Float>();
      HashMap<OClass, Integer> con2ChainNum = new HashMap<OClass, Integer>();
      float n0BDM=0.0f;
      num = 0;
      for(OClass curCon:leafyCon2Chain.keySet()) {
        String [] idsC = leafyCon2Chain.get(curCon).split(ConstantParameters.separater2);
        String lenS = "";
        for(int i=0; i<idsC.length; ++i) {
          String [] oneC = idsC[i].split(ConstantParameters.separater1);
          int len = oneC.length-1;
          n0BDM += len;
          lenS += len + " ";
          //System.out.println(num+", con="+curCon.getName()+", len="+len+", lenS="+lenS+"*");
          ++num;
          //get each concept from the chain
          for(int j=0; j<oneC.length; ++j) {
            OClass con = id2concept.get(new Integer(oneC[j]));
            if(con2ChainLen.containsKey(con)) {
              int len00= con2ChainLen.get(con).intValue()+len;
              con2ChainLen.put(con, new Float(len00));
            } else {
              con2ChainLen.put(con, new Float(len));
            }
            if(con2ChainNum.containsKey(con)) {
              int len00= con2ChainNum.get(con).intValue()+1;
              con2ChainNum.put(con, new Integer(len00));
            } else {
              con2ChainNum.put(con, new Integer(1));
            }
          }
        }
        lenS = lenS.trim();
        //leafyCon2ChainLen.put(curCon, lenS);
      }
      if(num>1) n0BDM /= num;
      //compute the average chain length for each concept
      num=1;
      for(OClass curCon:con2ChainLen.keySet()) {
        float len = con2ChainLen.get(curCon).floatValue();
        len /= con2ChainNum.get(curCon).intValue();
        con2ChainLen.put(curCon, new Float(len));
        //curCon = (OClass) ontologyUsed.getOResourceByName(curCon.getName());
        //System.out.println(num+", con="+curCon.getName()+", averlen="+len+"*"+", num="+con2ChainNum.get(curCon).intValue());
        ++num;
      }
      //compute the number of branches for each concept 
      HashMap<OClass,Integer>concept2branch = new HashMap<OClass,Integer>();
      float averBran = 0.0f;
      num = 0;
      for(OClass curCon:allConcepts) {
        int len = curCon.getSubClasses(OClass.DIRECT_CLOSURE).size();
        if(len>0) {
          concept2branch.put(curCon, new Integer(len));
          averBran += len;
          ++num;
        }
      }
      if(num>1) averBran /= num;
      //Now compute the BDM for each pair of concept
      
      for(OClass curCon11:allConcepts) {
        String [] chainS11 = concept2chain.get(curCon11).split(ConstantParameters.separater2);
        for(OClass curCon22:allConcepts) {
          int id11 = concept2id.get(curCon11).intValue();
          int id22 = concept2id.get(curCon22).intValue();
          if(id11<id22) continue;
          BDMOne bdmS = new BDMOne(curCon11, curCon22);
          if(id11==id22) {
            //get the shortest chain
            int len=Integer.MAX_VALUE;
            for(int i=0; i<chainS11.length; ++i) {
              String [] items = chainS11[i].split(ConstantParameters.separater1);
              if(len>items.length) len = items.length;
            }
            len -=1;
            bdmS.setValues(1.0f, len, 0, 0, n0BDM, 1, 1, 1);
            bdmS.setMsca(curCon11);
            bdmScores.add(bdmS);
            
            continue;
          }
          String [] chainS22 = concept2chain.get(curCon22).split(ConstantParameters.separater2);
          int lenS11 = chainS11.length;
          int lenS22 = chainS22.length;
          for(int iS11=0; iS11<lenS11; ++iS11) {
            for(int iS22=0; iS22<lenS22; ++iS22) {
              //determine the common part of the two chains
              String [] chain11 = chainS11[iS11].split(ConstantParameters.separater1);
              String [] chain22 = chainS22[iS22].split(ConstantParameters.separater1);
              int len11 = chain11.length;
              int len22 = chain22.length;
              int len00=len11;
              if(len00>len22) len00 = len22;
              int cp =0;
              for(int ii=0; ii<len00; ++ii) {
                if(chain11[len11-1-ii].equals(chain22[len22-1-ii])) {
                  ++cp;
                }
                else break;
              }
              //System.out.println("cp="+cp+", ("+curCon11.getName()+","+curCon22.getName()+
                //"), ch1="+chainS11[iS11]+",ch2="+chainS22[iS22]);
              float m1, m2;
              m1 = con2ChainLen.get(curCon11).floatValue();
              m2 = con2ChainLen.get(curCon22).floatValue();
              if(cp==0) { //the two concepts are not in the same connect part of ontology
                bdmS.setValues(0, -1, len11-1, len22-1, n0BDM, m1, m2, 1.0f);
              } else {
                Integer commonConId;
                commonConId=new Integer(chain11[len11-cp]); //get the common concept
                OClass commonCon = id2concept.get(commonConId);
                
                //System.out.println("comId="+commonConId.intValue()+", con="+commonCon.getName());
                
                cp -= 1; //count the edges, not the nodes
                int dpk, dpr;
                dpk = len11-1-cp;
                dpr = len22-1-cp;
                //compute the averaged branch in the chain from key to response
                int num11=0;
                float bran = 0.0f;
                if(concept2branch.containsKey(commonCon))
                  bran += concept2branch.get(commonCon).intValue();
                ++num11;
                
                for(int i=1; i<len11-cp-1; ++i) {
                  OClass con = id2concept.get(new Integer(chain11[i]));
                  if(concept2branch.containsKey(con))
                    bran += concept2branch.get(con).intValue();
                  ++num11;
                }
                for(int i=1; i<len22-cp-1; ++i) {
                  OClass con = id2concept.get(new Integer(chain22[i]));
                  if(concept2branch.containsKey(con))
                    bran += concept2branch.get(con).intValue();
                  ++num11;
                }
                if(num11>1) bran /= num11;
                bran /= averBran;
                //compute the bdm score for the two chains
                float bdm = bran*cp/n0BDM;
                bdm = bdm/(bdm+dpk/m1 + dpr/m2);
                if(bdm>bdmS.bdmScore) {
                  bdmS.setValues(bdm, cp, dpk, dpr, n0BDM, m1, m2, bran);
                }
                bdmS.setMsca(commonCon);
              }
            }
          } //end of the loop for the chains of the two concepts
          bdmScores.add(bdmS);
          
        }//end of the loop for the second concept
      }//end of the loop for the first concept
      //write the results into a file or console
      for(BDMOne oneb:bdmScores) {
        String text = oneb.printResult();
        if(isExistingResultFile) {
          bdmResultsWriter.append(text+"\n");
        } else {
          System.out.println(text);
        }
      }
      if(isExistingResultFile) {
        bdmResultsWriter.flush();
        bdmResultsWriter.close();
      }
      
	  }
    catch(UnsupportedEncodingException e1) {
      e1.printStackTrace();
    }
    catch(FileNotFoundException e1) {
      e1.printStackTrace();
    }
    catch(URISyntaxException e1) {
      e1.printStackTrace();
    }
    catch(IOException e) {
      e.printStackTrace();
    }
      
	  
	}
	/** recursive function to get the chain */
	String obtainAChain(OClass curCon, String chainSofar) {
	  String chainNow = "";
	  //String chainSofar = "";
	  //System.out.println("conCur=*"+curCon.getName()+"*"+", chainsofar=*"+chainSofar+"*");
	  //System.out.println("conId="+concept2id.get(curCon)+"*");
	  chainSofar += concept2id.get(curCon).toString();
	  if(curCon.isTopClass()) {
	    chainNow = chainSofar + ConstantParameters.separater2;
	    return chainNow;
	  } else {
	    Set<OClass> superCons = 
        curCon.getSuperClasses(OClass.DIRECT_CLOSURE);
	    chainSofar += ConstantParameters.separater1;
	    //if(superCons.size()>1) {
	     // System.out.println("****** curCon="+curCon.getName()+"*"+", num="+superCons.size());
	    //}
	    for(OClass oneCon:superCons) {
	      oneCon = (OClass) ontologyUsed.getOResourceByName(oneCon.getName());
	      chainNow +=  obtainAChain(oneCon, chainSofar);
	    }
	  }
	 return chainNow;
	}
	
	public void setOntologyURL(URL ontoU) {
    this.ontologyURL = ontoU;
  }

  public URL getOntologyURL() {
    return this.ontologyURL;
  }
  public void setOutputBDMFile(URL ontoU) {
    this.outputBDMFile = ontoU;
  }

  public URL getOutputBDMFile() {
    return this.outputBDMFile;
  }
  
  //public void setOntology(Ontology onto) {
    //this.ontologyUsed = onto;
  //}

  //public Ontology getOntology() {
   // return this.ontologyUsed;
  //}
}