1   /*
2    *  LuceneSearch.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Rosen Marinov, 19/Apr/2002
12   *
13   */
14  
15  package gate.creole.ir.lucene;
16  
17  import gate.creole.ir.*;
18  import java.util.*;
19  
20  import org.apache.lucene.index.*;
21  import org.apache.lucene.analysis.*;
22  import org.apache.lucene.document.*;
23  import org.apache.lucene.search.*;
24  import org.apache.lucene.queryParser.*;
25  import org.apache.lucene.store.*;
26  
27  /** This class represents Lucene implementation of serching in index. */
28  public class LuceneSearch implements Search {
29  
30    /** An instance of indexed corpus*/
31    private IndexedCorpus indexedCorpus;
32  
33    /** Set the indexed corpus resource for searching. */
34    public void setCorpus(IndexedCorpus ic){
35      this.indexedCorpus = ic;
36    }
37  
38    /** Search in corpus with this query. Unlimited result length.*/
39    public QueryResultList search(String query)
40                                           throws IndexException, SearchException{
41      return search(query, -1);
42    }
43  
44    /** Search in corpus with this query.
45     *  Size of the result list is limited. */
46    public QueryResultList search(String query, int limit)
47                                           throws IndexException, SearchException{
48      return search(query, limit, null);
49    }
50  
51    /** Search in corpus with this query.
52     *  In each QueryResult will be added values of theise fields.
53     *  Result length is unlimited. */
54    public QueryResultList search(String query, List fieldNames)
55                                           throws IndexException, SearchException{
56      return search(query, -1, fieldNames);
57    }
58  
59    /** Search in corpus with this query.
60     *  In each QueryResult will be added values of theise fields.
61     *  Result length is limited. */
62    public QueryResultList search(String query, int limit, List fieldNames)
63                                           throws IndexException, SearchException{
64      Vector result = new Vector();
65  
66      try {
67        IndexSearcher searcher = new IndexSearcher(indexedCorpus.getIndexDefinition().getIndexLocation());
68        Query luceneQuery = QueryParser.parse(query, "body", new SimpleAnalyzer());
69  
70        Hits hits = searcher.search(luceneQuery);
71        int resultlength = hits.length();
72        if (limit>-1) {
73          resultlength = Math.min(limit,resultlength);
74        }
75  
76        Vector fieldValues = null;
77        for (int i=0; i<resultlength; i++) {
78  
79          if (fieldNames != null){
80            fieldValues = new Vector();
81            for (int j=0; j<fieldNames.size(); j++){
82              fieldValues.add(new gate.creole.ir.Term( fieldNames.get(j).toString(), hits.doc(i).get(fieldNames.get(j).toString())));
83            }
84          }
85  
86          result.add(new QueryResult(hits.doc(i).get(LuceneIndexManager.DOCUMENT_ID),hits.score(i),fieldValues));
87        }// for (all search hints)
88  
89        searcher.close();
90  
91        return new QueryResultList(query, indexedCorpus, result);
92      }
93      catch (java.io.IOException ioe) {
94        throw new IndexException(ioe.getMessage());
95      }
96      catch (org.apache.lucene.queryParser.ParseException pe) {
97        throw new SearchException(pe.getMessage());
98      }
99    }
100 }