gate.creole.tokeniser
Class DefaultTokeniser

java.lang.Object
  |
  +--gate.util.AbstractFeatureBearer
        |
        +--gate.creole.AbstractResource
              |
              +--gate.creole.AbstractProcessingResource
                    |
                    +--gate.creole.AbstractLanguageAnalyser
                          |
                          +--gate.creole.tokeniser.DefaultTokeniser
All Implemented Interfaces:
ANNIEConstants, Executable, FeatureBearer, LanguageAnalyser, NameBearer, ProcessingResource, Resource, Serializable

public class DefaultTokeniser
extends AbstractLanguageAnalyser

A composed tokeniser containing a SimpleTokeniser and a Transducer. The simple tokeniser tokenises the document and the transducer processes its output.

See Also:
Serialized Form

Field Summary
static String DEF_TOK_ANNOT_SET_PARAMETER_NAME
           
static String DEF_TOK_DOCUMENT_PARAMETER_NAME
           
static String DEF_TOK_ENCODING_PARAMETER_NAME
           
static String DEF_TOK_GRAMRULES_URL_PARAMETER_NAME
           
static String DEF_TOK_TOKRULES_URL_PARAMETER_NAME
           
 
Fields inherited from interface gate.creole.ANNIEConstants
ANNOTATION_COREF_FEATURE_NAME, DATE_ANNOTATION_TYPE, DOCUMENT_COREF_FEATURE_NAME, LOCATION_ANNOTATION_TYPE, LOOKUP_ANNOTATION_TYPE, LOOKUP_MAJOR_TYPE_FEATURE_NAME, LOOKUP_MINOR_TYPE_FEATURE_NAME, MONEY_ANNOTATION_TYPE, ORGANIZATION_ANNOTATION_TYPE, PERSON_ANNOTATION_TYPE, PERSON_GENDER_FEATURE_NAME, PR_NAMES, SENTENCE_ANNOTATION_TYPE, SPACE_TOKEN_ANNOTATION_TYPE, TOKEN_ANNOTATION_TYPE, TOKEN_CATEGORY_FEATURE_NAME, TOKEN_KIND_FEATURE_NAME, TOKEN_LENGTH_FEATURE_NAME, TOKEN_ORTH_FEATURE_NAME, TOKEN_STRING_FEATURE_NAME
 
Constructor Summary
DefaultTokeniser()
           
 
Method Summary
 void execute()
          Starts the execution of this executable
 String getAnnotationSetName()
           
 String getEncoding()
           
 URL getTokeniserRulesURL()
           
 URL getTransducerGrammarURL()
           
 Resource init()
          Initialise this resource, and return it.
 void interrupt()
          Notifies all the PRs in this controller that they should stop their execution as soon as possible.
 void setAnnotationSetName(String annotationSetName)
           
 void setEncoding(String encoding)
           
 void setTokeniserRulesURL(URL tokeniserRulesURL)
           
 void setTransducerGrammarURL(URL transducerGrammarURL)
           
 
Methods inherited from class gate.creole.AbstractLanguageAnalyser
getCorpus, getDocument, setCorpus, setDocument
 
Methods inherited from class gate.creole.AbstractProcessingResource
addProgressListener, addStatusListener, cleanup, isInterrupted, reInit, removeProgressListener, removeStatusListener
 
Methods inherited from class gate.creole.AbstractResource
checkParameterValues, getName, getParameterValue, getParameterValue, removeResourceListeners, setName, setParameterValue, setParameterValue, setParameterValues, setParameterValues, setResourceListeners
 
Methods inherited from class gate.util.AbstractFeatureBearer
getFeatures, setFeatures
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 
Methods inherited from interface gate.ProcessingResource
isInterrupted, reInit
 
Methods inherited from interface gate.Resource
cleanup, getParameterValue, setParameterValue, setParameterValues
 
Methods inherited from interface gate.util.FeatureBearer
getFeatures, setFeatures
 
Methods inherited from interface gate.util.NameBearer
getName, setName
 

Field Detail

DEF_TOK_DOCUMENT_PARAMETER_NAME

public static final String DEF_TOK_DOCUMENT_PARAMETER_NAME

DEF_TOK_ANNOT_SET_PARAMETER_NAME

public static final String DEF_TOK_ANNOT_SET_PARAMETER_NAME

DEF_TOK_TOKRULES_URL_PARAMETER_NAME

public static final String DEF_TOK_TOKRULES_URL_PARAMETER_NAME

DEF_TOK_GRAMRULES_URL_PARAMETER_NAME

public static final String DEF_TOK_GRAMRULES_URL_PARAMETER_NAME

DEF_TOK_ENCODING_PARAMETER_NAME

public static final String DEF_TOK_ENCODING_PARAMETER_NAME
Constructor Detail

DefaultTokeniser

public DefaultTokeniser()
Method Detail

init

public Resource init()
              throws ResourceInstantiationException
Initialise this resource, and return it.
Overrides:
init in class AbstractProcessingResource

execute

public void execute()
             throws ExecutionException
Description copied from interface: Executable
Starts the execution of this executable
Overrides:
execute in class AbstractProcessingResource

interrupt

public void interrupt()
Notifies all the PRs in this controller that they should stop their execution as soon as possible.
Overrides:
interrupt in class AbstractProcessingResource

setTokeniserRulesURL

public void setTokeniserRulesURL(URL tokeniserRulesURL)

getTokeniserRulesURL

public URL getTokeniserRulesURL()

setEncoding

public void setEncoding(String encoding)

getEncoding

public String getEncoding()

setTransducerGrammarURL

public void setTransducerGrammarURL(URL transducerGrammarURL)

getTransducerGrammarURL

public URL getTransducerGrammarURL()

setAnnotationSetName

public void setAnnotationSetName(String annotationSetName)

getAnnotationSetName

public String getAnnotationSetName()