1/*------------------------------------------------------------------------------
2* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3*
4* Distributable under the terms of either the Apache License (Version 2.0) or
5* the GNU Lesser General Public License, as specified in the COPYING file.
6------------------------------------------------------------------------------*/
7#ifndef _lucene_search_SearchHeader_
8#define _lucene_search_SearchHeader_
9
10#if defined(_LUCENE_PRAGMA_ONCE)
11# pragma once
12#endif
13
14#include "CLucene/index/IndexReader.h"
15#include "CLucene/index/Term.h"
16#include "Filter.h"
17#include "CLucene/document/Document.h"
18#include "Sort.h"
19#include "CLucene/util/VoidList.h"
20#include "Explanation.h"
21#include "Similarity.h"
22
23CL_NS_DEF(search)
24
25 //predefine classes
26 class Scorer;
27 class Query;
28 class Hits;
29 class Sort;
30 class FieldDoc;
31 class TopFieldDocs;
32
33 /** Expert: Returned by low-level search implementations.
34 * @see TopDocs */
35 struct ScoreDoc {
36 /** Expert: A hit document's number.
37 * @see Searcher#doc(int32_t)
38 */
39 int32_t doc;
40
41 /** Expert: The score of this document for the query. */
42 qreal score;
43 };
44
45 /** Expert: Returned by low-level search implementations.
46 * @see Searcher#search(Query,Filter,int32_t) */
47 class TopDocs:LUCENE_BASE {
48 public:
49 /** Expert: The total number of hits for the query.
50 * @see Hits#length()
51 */
52 int32_t totalHits;
53
54 /** Expert: The top hits for the query. */
55 ScoreDoc* scoreDocs;
56 int32_t scoreDocsLength;
57
58 /** Expert: Constructs a TopDocs. TopDocs takes ownership of the ScoreDoc array*/
59 TopDocs(const int32_t th, ScoreDoc* sds, int32_t scoreDocsLength);
60 ~TopDocs();
61 };
62
63 // Lower-level search API.
64 // @see Searcher#search(Query,HitCollector)
65 class HitCollector: LUCENE_BASE {
66 public:
67 /** Called once for every non-zero scoring document, with the document number
68 * and its score.
69 *
70 * <P>If, for example, an application wished to collect all of the hits for a
71 * query in a BitSet, then it might:<pre>
72 * Searcher searcher = new IndexSearcher(indexReader);
73 * final BitSet bits = new BitSet(indexReader.maxDoc());
74 * searcher.search(query, new HitCollector() {
75 * public void collect(int32_t doc, float score) {
76 * bits.set(doc);
77 * }
78 * });
79 * </pre>
80 *
81 * <p>Note: This is called in an inner search loop. For good search
82 * performance, implementations of this method should not call
83 * {@link Searcher#doc(int32_t)} or
84 * {@link IndexReader#document(int32_t)} on every
85 * document number encountered. Doing so can slow searches by an order
86 * of magnitude or more.
87 * <p>Note: The <code>score</code> passed to this method is a raw score.
88 * In other words, the score will not necessarily be a float whose value is
89 * between 0 and 1.
90 */
91 virtual void collect(const int32_t doc, const qreal score) = 0;
92 virtual ~HitCollector(){}
93 };
94
95 /** Expert: Calculate query weights and build query scorers.
96 *
97 * <p>A Weight is constructed by a query, given a Searcher ({@link
98 * Query#_createWeight(Searcher)}). The {@link #sumOfSquaredWeights()} method
99 * is then called on the top-level query to compute the query normalization
100 * factor (@link Similarity#queryNorm(qreal)}). This factor is then passed to
101 * {@link #normalize(qreal)}. At this point the weighting is complete and a
102 * scorer may be constructed by calling {@link #scorer(IndexReader)}.
103 */
104 class Weight: LUCENE_BASE {
105 public:
106 virtual ~Weight(){
107 };
108
109 /** The query that this concerns. */
110 virtual Query* getQuery() = 0;
111
112 /** The weight for this query. */
113 virtual qreal getValue() = 0;
114
115 /** The sum of squared weights of contained query clauses. */
116 virtual qreal sumOfSquaredWeights() = 0;
117
118 /** Assigns the query normalization factor to this. */
119 virtual void normalize(qreal norm) = 0;
120
121 /** Constructs a scorer for this. */
122 virtual Scorer* scorer(CL_NS(index)::IndexReader* reader) = 0;
123
124 /** An explanation of the score computation for the named document. */
125 virtual void explain(CL_NS(index)::IndexReader* reader, int32_t doc, Explanation* ret) = 0;
126
127 virtual TCHAR* toString(){
128 return STRDUP_TtoT(_T("Weight"));
129 }
130 };
131
132 class HitDoc:LUCENE_BASE {
133 public:
134 qreal score;
135 int32_t id;
136 CL_NS(document)::Document* doc;
137
138 HitDoc* next; // in doubly-linked cache
139 HitDoc* prev; // in doubly-linked cache
140
141 HitDoc(const qreal s, const int32_t i);
142 ~HitDoc();
143 };
144
145
146
147 // A ranked list of documents, used to hold search results.
148 class Hits:LUCENE_BASE {
149 private:
150 Query* query;
151 Searcher* searcher;
152 Filter* filter;
153 const Sort* sort;
154
155 size_t _length; // the total number of hits
156 CL_NS(util)::CLVector<HitDoc*, CL_NS(util)::Deletor::Object<HitDoc> > hitDocs; // cache of hits retrieved
157
158 HitDoc* first; // head of LRU cache
159 HitDoc* last; // tail of LRU cache
160 int32_t numDocs; // number cached
161 int32_t maxDocs; // max to cache
162
163 public:
164 Hits(Searcher* s, Query* q, Filter* f, const Sort* sort=NULL);
165 ~Hits();
166
167 /** Returns the total number of hits available in this set. */
168 int32_t length() const;
169
170 /** Returns the stored fields of the n<sup>th</sup> document in this set.
171 <p>Documents are cached, so that repeated requests for the same element may
172 return the same Document object.
173 *
174 * @memory Memory belongs to the hits object. Don't delete the return value.
175 */
176 CL_NS(document)::Document& doc(const int32_t n);
177
178 /** Returns the id for the nth document in this set. */
179 int32_t id (const int32_t n);
180
181 /** Returns the score for the nth document in this set. */
182 qreal score(const int32_t n);
183
184 private:
185 // Tries to add new documents to hitDocs.
186 // Ensures that the hit numbered <code>_min</code> has been retrieved.
187 void getMoreDocs(const size_t _min);
188
189 HitDoc* getHitDoc(const size_t n);
190
191 void addToFront(HitDoc* hitDoc);
192
193 void remove(const HitDoc* hitDoc);
194
195 };
196
197 /** The interface for search implementations.
198 *
199 * <p>Implementations provide search over a single index, over multiple
200 * indices, and over indices on remote servers.
201 */
202 class Searchable: LUCENE_BASE {
203 public:
204 virtual ~Searchable(){
205 }
206
207 /** Lower-level search API.
208 *
209 * <p>{@link HitCollector#collect(int32_t,qreal)} is called for every non-zero
210 * scoring document.
211 *
212 * <p>Applications should only use this if they need <i>all</i> of the
213 * matching documents. The high-level search API ({@link
214 * Searcher#search(Query*)}) is usually more efficient, as it skips
215 * non-high-scoring hits.
216 *
217 * @param query to match documents
218 * @param filter if non-null, a bitset used to eliminate some documents
219 * @param results to receive hits
220 */
221 virtual void _search(Query* query, Filter* filter, HitCollector* results) = 0;
222
223 /** Frees resources associated with this Searcher.
224 * Be careful not to call this method while you are still using objects
225 * like {@link Hits}.
226 */
227 virtual void close() = 0;
228
229 /** Expert: Returns the number of documents containing <code>term</code>.
230 * Called by search code to compute term weights.
231 * @see IndexReader#docFreq(Term).
232 */
233 virtual int32_t docFreq(const CL_NS(index)::Term* term) const = 0;
234
235 /** Expert: Returns one greater than the largest possible document number.
236 * Called by search code to compute term weights.
237 * @see IndexReader#maxDoc().
238 */
239 virtual int32_t maxDoc() const = 0;
240
241 /** Expert: Low-level search implementation. Finds the top <code>n</code>
242 * hits for <code>query</code>, applying <code>filter</code> if non-null.
243 *
244 * <p>Called by {@link Hits}.
245 *
246 * <p>Applications should usually call {@link Searcher#search(Query*)} or
247 * {@link Searcher#search(Query*,Filter*)} instead.
248 */
249 virtual TopDocs* _search(Query* query, Filter* filter, const int32_t n) = 0;
250
251 /** Expert: Returns the stored fields of document <code>i</code>.
252 * Called by {@link HitCollector} implementations.
253 * @see IndexReader#document(int32_t).
254 */
255 virtual bool doc(int32_t i, CL_NS(document)::Document* d) = 0;
256 _CL_DEPRECATED( doc(i, document) ) CL_NS(document)::Document* doc(const int32_t i);
257
258 /** Expert: called to re-write queries into primitive queries. */
259 virtual Query* rewrite(Query* query) = 0;
260
261 /** Returns an Explanation that describes how <code>doc</code> scored against
262 * <code>query</code>.
263 *
264 * <p>This is intended to be used in developing Similarity implementations,
265 * and, for good performance, should not be displayed with every hit.
266 * Computing an explanation is as expensive as executing the query over the
267 * entire index.
268 */
269 virtual void explain(Query* query, int32_t doc, Explanation* ret) = 0;
270
271 /** Expert: Low-level search implementation with arbitrary sorting. Finds
272 * the top <code>n</code> hits for <code>query</code>, applying
273 * <code>filter</code> if non-null, and sorting the hits by the criteria in
274 * <code>sort</code>.
275 *
276 * <p>Applications should usually call {@link
277 * Searcher#search(Query,Filter,Sort)} instead.
278 */
279 virtual TopFieldDocs* _search(Query* query, Filter* filter, const int32_t n, const Sort* sort) = 0;
280 };
281
282
283
284 /** An abstract base class for search implementations.
285 * Implements some common utility methods.
286 */
287 class Searcher:public Searchable {
288 private:
289 /** The Similarity implementation used by this searcher. */
290 Similarity* similarity;
291
292 public:
293 Searcher(){
294 similarity = Similarity::getDefault();
295 }
296 virtual ~Searcher(){
297 }
298
299 // Returns the documents matching <code>query</code>.
300 Hits* search(Query* query) {
301 return search(query, (Filter*)NULL );
302 }
303
304 // Returns the documents matching <code>query</code> and
305 // <code>filter</code>.
306 Hits* search(Query* query, Filter* filter) {
307 return _CLNEW Hits(this, query, filter);
308 }
309
310 /** Returns documents matching <code>query</code> sorted by
311 * <code>sort</code>.
312 */
313 Hits* search(Query* query, const Sort* sort){
314 return _CLNEW Hits(this, query, NULL, sort);
315 }
316
317 /** Returns documents matching <code>query</code> and <code>filter</code>,
318 * sorted by <code>sort</code>.
319 */
320 Hits* search(Query* query, Filter* filter, const Sort* sort){
321 return _CLNEW Hits(this, query, filter, sort);
322 }
323
324 /** Lower-level search API.
325 *
326 * <p>{@link HitCollector#collect(int32_t ,qreal)} is called for every non-zero
327 * scoring document.
328 *
329 * <p>Applications should only use this if they need <i>all</i> of the
330 * matching documents. The high-level search API ({@link
331 * Searcher#search(Query*)}) is usually more efficient, as it skips
332 * non-high-scoring hits.
333 * <p>Note: The <code>score</code> passed to this method is a raw score.
334 * In other words, the score will not necessarily be a float whose value is
335 * between 0 and 1.
336 */
337 void _search(Query* query, HitCollector* results) {
338 Searchable::_search(query, NULL, results);
339 }
340
341 /** Expert: Set the Similarity implementation used by this Searcher.
342 *
343 * @see Similarity#setDefault(Similarity)
344 */
345 void setSimilarity(Similarity* similarity) {
346 this->similarity = similarity;
347 }
348
349 /** Expert: Return the Similarity implementation used by this Searcher.
350 *
351 * <p>This defaults to the current value of {@link Similarity#getDefault()}.
352 */
353 Similarity* getSimilarity(){
354 return this->similarity;
355 }
356 };
357
358 /** The abstract base class for queries.
359 <p>Instantiable subclasses are:
360 <ul>
361 <li> {@link TermQuery}
362 <li> {@link MultiTermQuery}
363 <li> {@link BooleanQuery}
364 <li> {@link WildcardQuery}
365 <li> {@link PhraseQuery}
366 <li> {@link PrefixQuery}
367 <li> {@link PhrasePrefixQuery}
368 <li> {@link FuzzyQuery}
369 <li> {@link RangeQuery}
370 <li> {@link spans.SpanQuery}
371 </ul>
372 <p>A parser for queries is contained in:
373 <ul>
374 <li>{@link queryParser.QueryParser QueryParser}
375 </ul>
376 */
377 class Query :LUCENE_BASE {
378 private:
379 // query boost factor
380 qreal boost;
381 protected:
382 Query(const Query& clone);
383 public:
384 Query();
385 virtual ~Query();
386
387 /** Sets the boost for this query clause to <code>b</code>. Documents
388 * matching this clause will (in addition to the normal weightings) have
389 * their score multiplied by <code>b</code>.
390 */
391 void setBoost(qreal b);
392
393 /** Gets the boost for this clause. Documents matching
394 * this clause will (in addition to the normal weightings) have their score
395 * multiplied by <code>b</code>. The boost is 1.0 by default.
396 */
397 qreal getBoost() const;
398
399 /** Expert: Constructs an initializes a Weight for a top-level query. */
400 Weight* weight(Searcher* searcher);
401
402 /** Expert: called to re-write queries into primitive queries. */
403 virtual Query* rewrite(CL_NS(index)::IndexReader* reader);
404
405 /** Expert: called when re-writing queries under MultiSearcher.
406 *
407 * <p>Only implemented by derived queries, with no
408 * {@link #_createWeight(Searcher)} implementatation.
409 */
410 virtual Query* combine(Query** queries);
411
412 /** Expert: merges the clauses of a set of BooleanQuery's into a single
413 * BooleanQuery.
414 *
415 *<p>A utility for use by {@link #combine(Query[])} implementations.
416 */
417 static Query* mergeBooleanQueries(Query** queries);
418
419 /** Expert: Returns the Similarity implementation to be used for this query.
420 * Subclasses may override this method to specify their own Similarity
421 * implementation, perhaps one that delegates through that of the Searcher.
422 * By default the Searcher's Similarity implementation is returned.*/
423 Similarity* getSimilarity(Searcher* searcher);
424
425 /** Returns a clone of this query. */
426 virtual Query* clone() const = 0;
427 virtual const TCHAR* getQueryName() const = 0;
428 bool instanceOf(const TCHAR* other) const;
429
430 /** Prints a query to a string, with <code>field</code> as the default field
431 * for terms. <p>The representation used is one that is readable by
432 * {@link queryParser.QueryParser QueryParser}
433 * (although, if the query was created by the parser, the printed
434 * representation may not be exactly what was parsed).
435 */
436 virtual TCHAR* toString(const TCHAR* field) const = 0;
437
438 virtual bool equals(Query* other) const = 0;
439 virtual size_t hashCode() const = 0;
440
441 /** Prints a query to a string. */
442 TCHAR* toString() const;
443
444
445 /** Expert: Constructs an appropriate Weight implementation for this query.
446 *
447 * <p>Only implemented by primitive queries, which re-write to themselves.
448 * <i>This is an Internal function</i>
449 */
450 virtual Weight* _createWeight(Searcher* searcher);
451
452 };
453
454
455CL_NS_END
456#endif
457