/*
* BDMCompMain.java
*
* Yaoyong Li 15/03/2009
*
* $Id: IaaMain.java, v 1.0 2009-03-15 12:58:16 +0000 yaoyong $
*/
package gate.bdmComp;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import gate.Factory;
import gate.FeatureMap;
import gate.Gate;
import gate.ProcessingResource;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.ontology.OClass;
import gate.creole.ontology.Ontology;
public class BDMCompMain extends AbstractLanguageAnalyser implements
ProcessingResource {
/** File name or URL storing the ontology. */
URL ontologyURL = null;
/** The ontology used. */
Ontology ontologyUsed = null;
/** The file storing the BDM score. */
URL outputBDMFile = null;
/** store the BDM information for each pair of concepts. */
Set<BDMOne>bdmScores = null;
/** Initialise this resource, and return it. */
public gate.Resource init() throws ResourceInstantiationException {
// step 2: load the Ontology_Tools plugin
File ontoHome = new File(Gate.getPluginsHome(),"Ontology_Tools");
try {
Gate.getCreoleRegister().addDirectory(ontoHome.toURI().toURL());
}
catch(MalformedURLException e) {
e.printStackTrace();
}
bdmScores = new HashSet<BDMOne>();
return this;
} // init()
HashMap<OClass,Integer> concept2id= new HashMap<OClass,Integer>();
/**
* Run the resource.
*
* @throws ExecutionException
*/
public void execute() throws ExecutionException {
//open the result file
if(corpus != null) {
if(corpus.size() != 0)
if(corpus.indexOf(document)>0)
return;
}
BufferedWriter bdmResultsWriter = null;
boolean isExistingResultFile = false;
try {
if(outputBDMFile != null && !outputBDMFile.toString().equals("")) {
bdmResultsWriter = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(new File(outputBDMFile.toURI())), "UTF-8"));
isExistingResultFile = true;
}
else {
System.out.println("There is no file specified for storing the BDM scores!");
}
/** load the ontology. */
//if(ontologyUsed == null || ontologyUsed.toString().trim() == "") {
if(ontologyURL == null || ontologyURL.toString().trim() == "") {
throw new ExecutionException("No ontology: neither using a loaded ontology nor giving the ontology URL!");
} else {
// step 3: set the parameters
FeatureMap fm = Factory.newFeatureMap();
fm.put("rdfXmlURL", ontologyURL);
// step 4: finally create an instance of ontology
try {
ontologyUsed = (Ontology)Factory.createResource("gate.creole.ontology.owlim.OWLIMOntologyLR", fm);
}
catch(ResourceInstantiationException e) {
e.printStackTrace();
}
}
//}
//write the header of the bdm score file
if(isExistingResultFile) {
bdmResultsWriter.append("##The following are the BDM scores for ");
bdmResultsWriter.append("each pair of concepts in the ontology named "+ontologyUsed.getName()+".\n");
}
// retrieving a list of top classes
Set<OClass> topClasses = ontologyUsed.getOClasses(true);
if(topClasses.size()>1) {
System.out.println("The ontology has "+topClasses.size() +" top classes!!");
}
// retrieving a list of all classes
Set<OClass> allConcepts = ontologyUsed.getOClasses(false);
//assign a number id to each class
HashMap<Integer,OClass> id2concept= new HashMap<Integer,OClass>();
int num=1;
for(OClass ob:allConcepts) {
concept2id.put(ob, new Integer(num));
id2concept.put(new Integer(num), ob);
//System.out.println(num+", *"+ob.getName()+"*"+", id="+concept2id.get(ob).intValue()+"*");
++num;
}
System.out.println("ontology "+ontologyUsed.getName()+", allConcepts="+allConcepts.size());
//for each concept, get the chain from it to the top class
HashMap<OClass,String> concept2chain = new HashMap<OClass,String>();
//obtainChains(concept2id, concept2chain);
num=1;
for(OClass curCon:allConcepts) {
String chainSofar = "";
int numS = curCon.getSuperClasses(OClass.DIRECT_CLOSURE).size();
//if(numS>1)
//System.out.println("****** curCon="+curCon.getName()+"*"+", num="+numS+"*");
String chains = obtainAChain(curCon, chainSofar);
concept2chain.put(curCon, chains);
//chainId.append(concept2id.get(curCon).toString());
/*String [] idsC = chains.split(ConstantParameters.separater2);
for(int i=0; i<idsC.length; ++i) {
String [] oneC = idsC[i].split(ConstantParameters.separater1);
String conChains="";
for(int j=0; j<oneC.length; ++j)
conChains += " "+ id2concept.get(new Integer(oneC[j])).getName();
System.out.println("num="+num+", concept:"+curCon.getName()+", chain="+conChains);
}*/
++num;
}
//get the leaf nodes, and the chain length for each leafy node
HashMap<OClass,String> leafyCon2Chain = new HashMap<OClass,String>();
num = 1;
for(OClass curCon:allConcepts) {
if(curCon.getSubClasses(OClass.DIRECT_CLOSURE).size()==0) {
//System.out.println(num+", leafy node="+curCon.getName()+"*");
leafyCon2Chain.put(curCon, concept2chain.get(curCon));
++num;
}
}
//compute the chain length coming through one node
HashMap<OClass, Float> con2ChainLen = new HashMap<OClass, Float>();
HashMap<OClass, Integer> con2ChainNum = new HashMap<OClass, Integer>();
float n0BDM=0.0f;
num = 0;
for(OClass curCon:leafyCon2Chain.keySet()) {
String [] idsC = leafyCon2Chain.get(curCon).split(ConstantParameters.separater2);
String lenS = "";
for(int i=0; i<idsC.length; ++i) {
String [] oneC = idsC[i].split(ConstantParameters.separater1);
int len = oneC.length-1;
n0BDM += len;
lenS += len + " ";
//System.out.println(num+", con="+curCon.getName()+", len="+len+", lenS="+lenS+"*");
++num;
//get each concept from the chain
for(int j=0; j<oneC.length; ++j) {
OClass con = id2concept.get(new Integer(oneC[j]));
if(con2ChainLen.containsKey(con)) {
int len00= con2ChainLen.get(con).intValue()+len;
con2ChainLen.put(con, new Float(len00));
} else {
con2ChainLen.put(con, new Float(len));
}
if(con2ChainNum.containsKey(con)) {
int len00= con2ChainNum.get(con).intValue()+1;
con2ChainNum.put(con, new Integer(len00));
} else {
con2ChainNum.put(con, new Integer(1));
}
}
}
lenS = lenS.trim();
//leafyCon2ChainLen.put(curCon, lenS);
}
if(num>1) n0BDM /= num;
//compute the average chain length for each concept
num=1;
for(OClass curCon:con2ChainLen.keySet()) {
float len = con2ChainLen.get(curCon).floatValue();
len /= con2ChainNum.get(curCon).intValue();
con2ChainLen.put(curCon, new Float(len));
//curCon = (OClass) ontologyUsed.getOResourceByName(curCon.getName());
//System.out.println(num+", con="+curCon.getName()+", averlen="+len+"*"+", num="+con2ChainNum.get(curCon).intValue());
++num;
}
//compute the number of branches for each concept
HashMap<OClass,Integer>concept2branch = new HashMap<OClass,Integer>();
float averBran = 0.0f;
num = 0;
for(OClass curCon:allConcepts) {
int len = curCon.getSubClasses(OClass.DIRECT_CLOSURE).size();
if(len>0) {
concept2branch.put(curCon, new Integer(len));
averBran += len;
++num;
}
}
if(num>1) averBran /= num;
//Now compute the BDM for each pair of concept
for(OClass curCon11:allConcepts) {
String [] chainS11 = concept2chain.get(curCon11).split(ConstantParameters.separater2);
for(OClass curCon22:allConcepts) {
int id11 = concept2id.get(curCon11).intValue();
int id22 = concept2id.get(curCon22).intValue();
if(id11<id22) continue;
BDMOne bdmS = new BDMOne(curCon11, curCon22);
if(id11==id22) {
//get the shortest chain
int len=Integer.MAX_VALUE;
for(int i=0; i<chainS11.length; ++i) {
String [] items = chainS11[i].split(ConstantParameters.separater1);
if(len>items.length) len = items.length;
}
len -=1;
bdmS.setValues(1.0f, len, 0, 0, n0BDM, 1, 1, 1);
bdmS.setMsca(curCon11);
bdmScores.add(bdmS);
continue;
}
String [] chainS22 = concept2chain.get(curCon22).split(ConstantParameters.separater2);
int lenS11 = chainS11.length;
int lenS22 = chainS22.length;
for(int iS11=0; iS11<lenS11; ++iS11) {
for(int iS22=0; iS22<lenS22; ++iS22) {
//determine the common part of the two chains
String [] chain11 = chainS11[iS11].split(ConstantParameters.separater1);
String [] chain22 = chainS22[iS22].split(ConstantParameters.separater1);
int len11 = chain11.length;
int len22 = chain22.length;
int len00=len11;
if(len00>len22) len00 = len22;
int cp =0;
for(int ii=0; ii<len00; ++ii) {
if(chain11[len11-1-ii].equals(chain22[len22-1-ii])) {
++cp;
}
else break;
}
//System.out.println("cp="+cp+", ("+curCon11.getName()+","+curCon22.getName()+
//"), ch1="+chainS11[iS11]+",ch2="+chainS22[iS22]);
float m1, m2;
m1 = con2ChainLen.get(curCon11).floatValue();
m2 = con2ChainLen.get(curCon22).floatValue();
if(cp==0) { //the two concepts are not in the same connect part of ontology
bdmS.setValues(0, -1, len11-1, len22-1, n0BDM, m1, m2, 1.0f);
} else {
Integer commonConId;
commonConId=new Integer(chain11[len11-cp]); //get the common concept
OClass commonCon = id2concept.get(commonConId);
//System.out.println("comId="+commonConId.intValue()+", con="+commonCon.getName());
cp -= 1; //count the edges, not the nodes
int dpk, dpr;
dpk = len11-1-cp;
dpr = len22-1-cp;
//compute the averaged branch in the chain from key to response
int num11=0;
float bran = 0.0f;
if(concept2branch.containsKey(commonCon))
bran += concept2branch.get(commonCon).intValue();
++num11;
for(int i=1; i<len11-cp-1; ++i) {
OClass con = id2concept.get(new Integer(chain11[i]));
if(concept2branch.containsKey(con))
bran += concept2branch.get(con).intValue();
++num11;
}
for(int i=1; i<len22-cp-1; ++i) {
OClass con = id2concept.get(new Integer(chain22[i]));
if(concept2branch.containsKey(con))
bran += concept2branch.get(con).intValue();
++num11;
}
if(num11>1) bran /= num11;
bran /= averBran;
//compute the bdm score for the two chains
float bdm = bran*cp/n0BDM;
bdm = bdm/(bdm+dpk/m1 + dpr/m2);
if(bdm>bdmS.bdmScore) {
bdmS.setValues(bdm, cp, dpk, dpr, n0BDM, m1, m2, bran);
}
bdmS.setMsca(commonCon);
}
}
} //end of the loop for the chains of the two concepts
bdmScores.add(bdmS);
}//end of the loop for the second concept
}//end of the loop for the first concept
//write the results into a file or console
for(BDMOne oneb:bdmScores) {
String text = oneb.printResult();
if(isExistingResultFile) {
bdmResultsWriter.append(text+"\n");
} else {
System.out.println(text);
}
}
if(isExistingResultFile) {
bdmResultsWriter.flush();
bdmResultsWriter.close();
}
}
catch(UnsupportedEncodingException e1) {
e1.printStackTrace();
}
catch(FileNotFoundException e1) {
e1.printStackTrace();
}
catch(URISyntaxException e1) {
e1.printStackTrace();
}
catch(IOException e) {
e.printStackTrace();
}
}
/** recursive function to get the chain */
String obtainAChain(OClass curCon, String chainSofar) {
String chainNow = "";
//String chainSofar = "";
//System.out.println("conCur=*"+curCon.getName()+"*"+", chainsofar=*"+chainSofar+"*");
//System.out.println("conId="+concept2id.get(curCon)+"*");
chainSofar += concept2id.get(curCon).toString();
if(curCon.isTopClass()) {
chainNow = chainSofar + ConstantParameters.separater2;
return chainNow;
} else {
Set<OClass> superCons =
curCon.getSuperClasses(OClass.DIRECT_CLOSURE);
chainSofar += ConstantParameters.separater1;
//if(superCons.size()>1) {
// System.out.println("****** curCon="+curCon.getName()+"*"+", num="+superCons.size());
//}
for(OClass oneCon:superCons) {
oneCon = (OClass) ontologyUsed.getOResourceByName(oneCon.getName());
chainNow += obtainAChain(oneCon, chainSofar);
}
}
return chainNow;
}
public void setOntologyURL(URL ontoU) {
this.ontologyURL = ontoU;
}
public URL getOntologyURL() {
return this.ontologyURL;
}
public void setOutputBDMFile(URL ontoU) {
this.outputBDMFile = ontoU;
}
public URL getOutputBDMFile() {
return this.outputBDMFile;
}
//public void setOntology(Ontology onto) {
//this.ontologyUsed = onto;
//}
//public Ontology getOntology() {
// return this.ontologyUsed;
//}
}