gate.corpora
Class SerialCorpusImpl

java.lang.Object
  |
  +--gate.util.AbstractFeatureBearer
        |
        +--gate.creole.AbstractResource
              |
              +--gate.creole.AbstractLanguageResource
                    |
                    +--gate.corpora.SerialCorpusImpl
All Implemented Interfaces:
Collection, Corpus, CreoleListener, DatastoreListener, EventListener, FeatureBearer, gate.creole.ir.IndexedCorpus, LanguageResource, List, NameBearer, Resource, Serializable

public class SerialCorpusImpl
extends AbstractLanguageResource
implements Corpus, CreoleListener, DatastoreListener, gate.creole.ir.IndexedCorpus

See Also:
Serialized Form

Fields inherited from interface gate.Corpus
CORPUS_DOCLIST_PARAMETER_NAME, CORPUS_NAME_PARAMETER_NAME
 
Constructor Summary
SerialCorpusImpl()
           
 
Method Summary
 void add(int index, Object o)
           
 boolean add(Object o)
           
 boolean addAll(Collection c)
           
 boolean addAll(int index, Collection c)
           
 void addCorpusListener(CorpusListener l)
          Registers a new CorpusListener with this corpus.
 void cleanup()
          Every LR that is a CreoleListener (and other Listeners too) must override this method and make sure it removes itself from the objects which it has been listening to.
 void clear()
           
 boolean contains(Object o)
           
 boolean containsAll(Collection c)
           
 void datastoreClosed(CreoleEvent e)
          Called when a DataStore has been closed
 void datastoreCreated(CreoleEvent e)
          Called when a DataStore has been created
 void datastoreOpened(CreoleEvent e)
          Called when a DataStore has been opened
 boolean equals(Object o)
           
 int findDocument(Document doc)
           
 Object get(int index)
           
 String getDocumentName(int index)
          Gets the name of a document in this corpus.
 List getDocumentNames()
          Gets the names of the documents in this corpus.
 gate.creole.ir.IndexDefinition getIndexDefinition()
           
 gate.creole.ir.IndexManager getIndexManager()
           
 gate.creole.ir.IndexStatistics getIndexStatistics()
           
 Object getTransientSource()
           
 int hashCode()
           
 int indexOf(Object o)
           
 Resource init()
          Initialise this resource, and return it.
 boolean isDocumentLoaded(int index)
          This method returns true when the document is already loaded in memory
 boolean isEmpty()
           
 boolean isPersistentDocument(int index)
          This method returns true when the document is already stored on disk i.e., is not transient
 Iterator iterator()
           
 int lastIndexOf(Object o)
           
 ListIterator listIterator()
           
 ListIterator listIterator(int index)
           
 void populate(URL directory, FileFilter filter, String encoding, boolean recurseDirectories)
          Fills this corpus with documents created from files in a directory.
 Object remove(int index)
           
 boolean remove(Object o)
           
 boolean removeAll(Collection c)
           
 void removeCorpusListener(CorpusListener l)
          Removes one of the listeners registered with this corpus.
 void resourceAdopted(DatastoreEvent evt)
          Called by a datastore when a new resource has been adopted
 void resourceDeleted(DatastoreEvent evt)
          Called by a datastore when a resource has been deleted
 void resourceLoaded(CreoleEvent e)
          Called when a new Resource has been loaded into the system
 void resourceRenamed(Resource resource, String oldName, String newName)
          Called when the creole register has renamed a resource.1
 void resourceUnloaded(CreoleEvent e)
          Called when a Resource has been removed from the system
 void resourceWritten(DatastoreEvent evt)
          Called by a datastore when a resource has been wrote into the datastore
 boolean retainAll(Collection c)
           
 Object set(int index, Object element)
           
 void setDataStore(DataStore dataStore)
          Set the data store that this LR lives in.
 void setDocumentPersistentID(int index, Object persID)
          This method should only be used by the Serial Datastore to set
 void setIndexDefinition(gate.creole.ir.IndexDefinition definition)
           
 void setTransientSource(Object source)
           
 int size()
           
 List subList(int fromIndex, int toIndex)
          persistent Corpus does not support this method as all the documents might no be in memory
 Object[] toArray()
           
 Object[] toArray(Object[] a)
           
 String toString()
           
 void unloadDocument(Document doc)
          Unloads a document from memory
 void unloadDocument(int index)
          Unloads the document from memory, but calls sync() first, to store the changes
 
Methods inherited from class gate.creole.AbstractLanguageResource
getDataStore, getLRPersistenceId, getParent, isModified, setLRPersistenceId, setParent, sync
 
Methods inherited from class gate.creole.AbstractResource
checkParameterValues, getName, getParameterValue, getParameterValue, removeResourceListeners, setName, setParameterValue, setParameterValue, setParameterValues, setParameterValues, setResourceListeners
 
Methods inherited from class gate.util.AbstractFeatureBearer
getFeatures, setFeatures
 
Methods inherited from class java.lang.Object
getClass, notify, notifyAll, wait, wait, wait
 
Methods inherited from interface gate.LanguageResource
getDataStore, getLRPersistenceId, getParent, isModified, setLRPersistenceId, setParent, sync
 
Methods inherited from interface gate.Resource
getParameterValue, setParameterValue, setParameterValues
 
Methods inherited from interface gate.util.FeatureBearer
getFeatures, setFeatures
 
Methods inherited from interface gate.util.NameBearer
getName, setName
 

Constructor Detail

SerialCorpusImpl

public SerialCorpusImpl()
Method Detail

getDocumentNames

public List getDocumentNames()
Gets the names of the documents in this corpus.
Specified by:
getDocumentNames in interface Corpus
Returns:
a List of Strings representing the names of the documents in this corpus.

setDocumentPersistentID

public void setDocumentPersistentID(int index,
                                    Object persID)
This method should only be used by the Serial Datastore to set

getDocumentName

public String getDocumentName(int index)
Gets the name of a document in this corpus.
Specified by:
getDocumentName in interface Corpus
Parameters:
index - the index of the document
Returns:
a String value representing the name of the document at index in this corpus.


unloadDocument

public void unloadDocument(int index)
Unloads the document from memory, but calls sync() first, to store the changes

unloadDocument

public void unloadDocument(Document doc)
Unloads a document from memory
Specified by:
unloadDocument in interface Corpus
Following copied from interface: gate.Corpus
Parameters:
Document - to be unloaded from memory.
Returns:
void.

isDocumentLoaded

public boolean isDocumentLoaded(int index)
This method returns true when the document is already loaded in memory
Specified by:
isDocumentLoaded in interface Corpus

isPersistentDocument

public boolean isPersistentDocument(int index)
This method returns true when the document is already stored on disk i.e., is not transient

cleanup

public void cleanup()
Every LR that is a CreoleListener (and other Listeners too) must override this method and make sure it removes itself from the objects which it has been listening to. Otherwise, the object will not be released from memory (memory leak!).
Specified by:
cleanup in interface Resource
Overrides:
cleanup in class AbstractLanguageResource

populate

public void populate(URL directory,
                     FileFilter filter,
                     String encoding,
                     boolean recurseDirectories)
              throws IOException,
                     ResourceInstantiationException
Fills this corpus with documents created from files in a directory.
Specified by:
populate in interface Corpus
Parameters:
filter - the file filter used to select files from the target directory. If the filter is null all the files will be accepted.
directory - the directory from which the files will be picked. This parameter is an URL for uniformity. It needs to be a URL of type file otherwise an InvalidArgumentException will be thrown. An implementation for this method is provided as a static method at gate.corpora.CorpusImpl#populate(Corpus,URL,FileFilter,boolean).
encoding - the encoding to be used for reading the documents
recurseDirectories - should the directory be parsed recursively?. If true all the files from the provided directory and all its children directories (on as many levels as necessary) will be picked if accepted by the filter otherwise the children directories will be ignored.

removeCorpusListener

public void removeCorpusListener(CorpusListener l)
Description copied from interface: Corpus
Removes one of the listeners registered with this corpus.
Specified by:
removeCorpusListener in interface Corpus
Following copied from interface: gate.Corpus
Parameters:
l - the listener to be removed.

addCorpusListener

public void addCorpusListener(CorpusListener l)
Description copied from interface: Corpus
Registers a new CorpusListener with this corpus.
Specified by:
addCorpusListener in interface Corpus
Following copied from interface: gate.Corpus
Parameters:
l - the listener to be added.

resourceLoaded

public void resourceLoaded(CreoleEvent e)
Description copied from interface: CreoleListener
Called when a new Resource has been loaded into the system
Specified by:
resourceLoaded in interface CreoleListener

resourceRenamed

public void resourceRenamed(Resource resource,
                            String oldName,
                            String newName)
Description copied from interface: CreoleListener
Called when the creole register has renamed a resource.1
Specified by:
resourceRenamed in interface CreoleListener

resourceUnloaded

public void resourceUnloaded(CreoleEvent e)
Description copied from interface: CreoleListener
Called when a Resource has been removed from the system
Specified by:
resourceUnloaded in interface CreoleListener

datastoreOpened

public void datastoreOpened(CreoleEvent e)
Description copied from interface: CreoleListener
Called when a DataStore has been opened
Specified by:
datastoreOpened in interface CreoleListener

datastoreCreated

public void datastoreCreated(CreoleEvent e)
Description copied from interface: CreoleListener
Called when a DataStore has been created
Specified by:
datastoreCreated in interface CreoleListener

datastoreClosed

public void datastoreClosed(CreoleEvent e)
Description copied from interface: CreoleListener
Called when a DataStore has been closed
Specified by:
datastoreClosed in interface CreoleListener

resourceAdopted

public void resourceAdopted(DatastoreEvent evt)
Called by a datastore when a new resource has been adopted
Specified by:
resourceAdopted in interface DatastoreListener

resourceDeleted

public void resourceDeleted(DatastoreEvent evt)
Called by a datastore when a resource has been deleted
Specified by:
resourceDeleted in interface DatastoreListener

resourceWritten

public void resourceWritten(DatastoreEvent evt)
Called by a datastore when a resource has been wrote into the datastore
Specified by:
resourceWritten in interface DatastoreListener

size

public int size()
Specified by:
size in interface List

isEmpty

public boolean isEmpty()
Specified by:
isEmpty in interface List

contains

public boolean contains(Object o)
Specified by:
contains in interface List

iterator

public Iterator iterator()
Specified by:
iterator in interface List

toString

public String toString()
Overrides:
toString in class Object

toArray

public Object[] toArray()
Specified by:
toArray in interface List

toArray

public Object[] toArray(Object[] a)
Specified by:
toArray in interface List

add

public boolean add(Object o)
Specified by:
add in interface List

remove

public boolean remove(Object o)
Specified by:
remove in interface List

findDocument

public int findDocument(Document doc)

containsAll

public boolean containsAll(Collection c)
Specified by:
containsAll in interface List

addAll

public boolean addAll(Collection c)
Specified by:
addAll in interface List

addAll

public boolean addAll(int index,
                      Collection c)
Specified by:
addAll in interface List

removeAll

public boolean removeAll(Collection c)
Specified by:
removeAll in interface List

retainAll

public boolean retainAll(Collection c)
Specified by:
retainAll in interface List

clear

public void clear()
Specified by:
clear in interface List

equals

public boolean equals(Object o)
Specified by:
equals in interface List
Overrides:
equals in class Object

hashCode

public int hashCode()
Specified by:
hashCode in interface List
Overrides:
hashCode in class Object

get

public Object get(int index)
Specified by:
get in interface List

set

public Object set(int index,
                  Object element)
Specified by:
set in interface List

add

public void add(int index,
                Object o)
Specified by:
add in interface List

remove

public Object remove(int index)
Specified by:
remove in interface List

indexOf

public int indexOf(Object o)
Specified by:
indexOf in interface List

lastIndexOf

public int lastIndexOf(Object o)
Specified by:
lastIndexOf in interface List

listIterator

public ListIterator listIterator()
Specified by:
listIterator in interface List

listIterator

public ListIterator listIterator(int index)
Specified by:
listIterator in interface List

subList

public List subList(int fromIndex,
                    int toIndex)
persistent Corpus does not support this method as all the documents might no be in memory
Specified by:
subList in interface List

setDataStore

public void setDataStore(DataStore dataStore)
                  throws PersistenceException
Description copied from interface: LanguageResource
Set the data store that this LR lives in.
Specified by:
setDataStore in interface LanguageResource
Overrides:
setDataStore in class AbstractLanguageResource

setTransientSource

public void setTransientSource(Object source)

getTransientSource

public Object getTransientSource()

init

public Resource init()
              throws ResourceInstantiationException
Description copied from interface: Resource
Initialise this resource, and return it.
Specified by:
init in interface Resource
Overrides:
init in class AbstractResource

setIndexDefinition

public void setIndexDefinition(gate.creole.ir.IndexDefinition definition)
Specified by:
setIndexDefinition in interface gate.creole.ir.IndexedCorpus

getIndexDefinition

public gate.creole.ir.IndexDefinition getIndexDefinition()
Specified by:
getIndexDefinition in interface gate.creole.ir.IndexedCorpus

getIndexManager

public gate.creole.ir.IndexManager getIndexManager()
Specified by:
getIndexManager in interface gate.creole.ir.IndexedCorpus

getIndexStatistics

public gate.creole.ir.IndexStatistics getIndexStatistics()
Specified by:
getIndexStatistics in interface gate.creole.ir.IndexedCorpus