1   /*
2    *  Copyright (c) 1998-2001, The University of Sheffield.
3    *
4    *  This file is part of GATE (see http://gate.ac.uk/), and is free
5    *  software, licenced under the GNU Library General Public License,
6    *  Version 2, June 1991 (in the distribution as file licence.html,
7    *  and also available at http://gate.ac.uk/gate/licence.html).
8    *
9    *  Valentin Tablan 17 June 2002
10   *
11   *  $Id: AnnotationLengthExtractor.java,v 1.1 2002/06/27 17:12:32 valyt Exp $
12   */
13  package gate.ml;
14  
15  import java.util.*;
16  
17  import weka.core.*;
18  
19  import gate.*;
20  import gate.util.*;
21  import gate.creole.ANNIEConstants;
22  
23  
24  public class AnnotationLengthExtractor extends AbstractAttributeExtractor {
25  
26    public AnnotationLengthExtractor() {
27    }
28  
29    public Attribute getAttribute() {
30      return new Attribute("Annotation length");
31    }
32  
33  
34    public Object getAttributeValue(Object data) {
35      //the data is an annotation in this case.
36      Annotation ann = (Annotation)data;
37      Long endOffset = ann.getEndNode().getOffset();
38      Long nextOffset = ann.getStartNode().getOffset();
39      int tokensCnt = 0;
40      while(nextOffset != null &&
41            nextOffset.compareTo(endOffset) < 0){
42        //advance offset counting all tokens found
43        Set startingAnnots = dataCollector.getStartingAnnotations(nextOffset);
44        if(startingAnnots != null && (!startingAnnots.isEmpty())){
45          Iterator annIter = startingAnnots.iterator();
46          while(annIter.hasNext()){
47            Annotation annotation = (Annotation)annIter.next();
48            if(annotation.getType().equals(ANNIEConstants.TOKEN_ANNOTATION_TYPE)){
49              tokensCnt++;
50            }
51          }
52        }
53        nextOffset = dataCollector.nextOffset(nextOffset);
54      }
55      return new Double(tokensCnt);
56    }
57  }