/** * A standalone application that makes use of GATE PR's as well as a user defined one. * The program displays the features of each document as created by the PR "Goldfish". * * @author Andrew Golightly (acg4@cs.waikato.ac.nz) * -- last updated 16/05/2003 */ package andrewgolightly.nlp.gate; import gate.Gate; import gate.Document; import gate.util.GateException; import gate.Factory; import gate.creole.SerialAnalyserController; import java.util.Iterator; import java.io.File; public class TotalGoldfishCount { private gate.Corpus corpus; public TotalGoldfishCount(String[] files) throws Exception { Gate.init(); Gate.getCreoleRegister().registerDirectories( new File(System.getProperty("user.dir")).toURL()); // add files to a corpus System.out.println("\n== OBTAINING DOCUMENTS =="); createCorpus(files); System.out.println("\n== USING GATE TO PROCESS THE DOCUMENTS =="); String[] processingResources = {"gate.creole.tokeniser.DefaultTokeniser", "gate.creole.splitter.SentenceSplitter", "andrewgolightly.nlp.gate.prs.Goldfish"}; runProcessingResources(processingResources); System.out.println("\n== DOCUMENT FEATURES =="); displayDocumentFeatures(); System.out.println("\nDemo done... :)"); } private void createCorpus(String[] files) throws GateException { corpus = Factory.newCorpus("Transient Gate Corpus"); for(int file = 0; file < files.length; file++) { System.out.print("\t " + (file + 1) + ") " + files[file]); try { corpus.add(Factory.newDocument(new File(files[file]).toURL())); System.out.println(" -- success"); } catch(gate.creole.ResourceInstantiationException e) { System.out.println(" -- failed (" + e.getMessage() + ")"); } catch(Exception e) { System.out.println(" -- " + e.getMessage()); } } } private void runProcessingResources(String[] processingResource) throws GateException { SerialAnalyserController pipeline = (SerialAnalyserController)Factory .createResource("gate.creole.SerialAnalyserController"); for(int pr = 0; pr < processingResource.length; pr++) { System.out.print("\t* Loading " + processingResource[pr] + " ... "); pipeline.add((gate.LanguageAnalyser)Factory .createResource(processingResource[pr])); System.out.println("done"); } System.out.print("Creating corpus from documents obtained..."); pipeline.setCorpus(corpus); System.out.println("done"); System.out.print("Running processing resources over corpus..."); pipeline.execute(); System.out.println("done"); } private void displayDocumentFeatures() { Iterator documentIterator = corpus.iterator(); while(documentIterator.hasNext()) { Document currDoc = (Document)documentIterator.next(); System.out.println("The features of document \"" + currDoc.getSourceUrl().getFile() + "\" are:"); gate.FeatureMap documentFeatures = currDoc.getFeatures(); Iterator featureIterator = documentFeatures.keySet().iterator(); while(featureIterator.hasNext()) { String key = (String)featureIterator.next(); System.out.println("\t*) " + key + " --> " + documentFeatures.get(key)); } System.out.println(); } } public static void main(String[] args) { if(args.length == 0) System.err.println("USAGE: java TotalGoldfishCount <file1> <file2> ..."); else try { new TotalGoldfishCount(args); } catch(Exception e) { e.printStackTrace(); } } }