1 | /*------------------------------------------------------------------------------ |
2 | * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team |
3 | * |
4 | * Distributable under the terms of either the Apache License (Version 2.0) or |
5 | * the GNU Lesser General Public License, as specified in the COPYING file. |
6 | ------------------------------------------------------------------------------*/ |
7 | #ifndef _lucene_search_SearchHeader_ |
8 | #define _lucene_search_SearchHeader_ |
9 | |
10 | #if defined(_LUCENE_PRAGMA_ONCE) |
11 | # pragma once |
12 | #endif |
13 | |
14 | #include "CLucene/index/IndexReader.h" |
15 | #include "CLucene/index/Term.h" |
16 | #include "Filter.h" |
17 | #include "CLucene/document/Document.h" |
18 | #include "Sort.h" |
19 | #include "CLucene/util/VoidList.h" |
20 | #include "Explanation.h" |
21 | #include "Similarity.h" |
22 | |
23 | CL_NS_DEF(search) |
24 | |
25 | //predefine classes |
26 | class Scorer; |
27 | class Query; |
28 | class Hits; |
29 | class Sort; |
30 | class FieldDoc; |
31 | class TopFieldDocs; |
32 | |
33 | /** Expert: Returned by low-level search implementations. |
34 | * @see TopDocs */ |
35 | struct ScoreDoc { |
36 | /** Expert: A hit document's number. |
37 | * @see Searcher#doc(int32_t) |
38 | */ |
39 | int32_t doc; |
40 | |
41 | /** Expert: The score of this document for the query. */ |
42 | qreal score; |
43 | }; |
44 | |
45 | /** Expert: Returned by low-level search implementations. |
46 | * @see Searcher#search(Query,Filter,int32_t) */ |
47 | class TopDocs:LUCENE_BASE { |
48 | public: |
49 | /** Expert: The total number of hits for the query. |
50 | * @see Hits#length() |
51 | */ |
52 | int32_t totalHits; |
53 | |
54 | /** Expert: The top hits for the query. */ |
55 | ScoreDoc* scoreDocs; |
56 | int32_t scoreDocsLength; |
57 | |
58 | /** Expert: Constructs a TopDocs. TopDocs takes ownership of the ScoreDoc array*/ |
59 | TopDocs(const int32_t th, ScoreDoc* sds, int32_t scoreDocsLength); |
60 | ~TopDocs(); |
61 | }; |
62 | |
63 | // Lower-level search API. |
64 | // @see Searcher#search(Query,HitCollector) |
65 | class HitCollector: LUCENE_BASE { |
66 | public: |
67 | /** Called once for every non-zero scoring document, with the document number |
68 | * and its score. |
69 | * |
70 | * <P>If, for example, an application wished to collect all of the hits for a |
71 | * query in a BitSet, then it might:<pre> |
72 | * Searcher searcher = new IndexSearcher(indexReader); |
73 | * final BitSet bits = new BitSet(indexReader.maxDoc()); |
74 | * searcher.search(query, new HitCollector() { |
75 | * public void collect(int32_t doc, float score) { |
76 | * bits.set(doc); |
77 | * } |
78 | * }); |
79 | * </pre> |
80 | * |
81 | * <p>Note: This is called in an inner search loop. For good search |
82 | * performance, implementations of this method should not call |
83 | * {@link Searcher#doc(int32_t)} or |
84 | * {@link IndexReader#document(int32_t)} on every |
85 | * document number encountered. Doing so can slow searches by an order |
86 | * of magnitude or more. |
87 | * <p>Note: The <code>score</code> passed to this method is a raw score. |
88 | * In other words, the score will not necessarily be a float whose value is |
89 | * between 0 and 1. |
90 | */ |
91 | virtual void collect(const int32_t doc, const qreal score) = 0; |
92 | virtual ~HitCollector(){} |
93 | }; |
94 | |
95 | /** Expert: Calculate query weights and build query scorers. |
96 | * |
97 | * <p>A Weight is constructed by a query, given a Searcher ({@link |
98 | * Query#_createWeight(Searcher)}). The {@link #sumOfSquaredWeights()} method |
99 | * is then called on the top-level query to compute the query normalization |
100 | * factor (@link Similarity#queryNorm(qreal)}). This factor is then passed to |
101 | * {@link #normalize(qreal)}. At this point the weighting is complete and a |
102 | * scorer may be constructed by calling {@link #scorer(IndexReader)}. |
103 | */ |
104 | class Weight: LUCENE_BASE { |
105 | public: |
106 | virtual ~Weight(){ |
107 | }; |
108 | |
109 | /** The query that this concerns. */ |
110 | virtual Query* getQuery() = 0; |
111 | |
112 | /** The weight for this query. */ |
113 | virtual qreal getValue() = 0; |
114 | |
115 | /** The sum of squared weights of contained query clauses. */ |
116 | virtual qreal sumOfSquaredWeights() = 0; |
117 | |
118 | /** Assigns the query normalization factor to this. */ |
119 | virtual void normalize(qreal norm) = 0; |
120 | |
121 | /** Constructs a scorer for this. */ |
122 | virtual Scorer* scorer(CL_NS(index)::IndexReader* reader) = 0; |
123 | |
124 | /** An explanation of the score computation for the named document. */ |
125 | virtual void explain(CL_NS(index)::IndexReader* reader, int32_t doc, Explanation* ret) = 0; |
126 | |
127 | virtual TCHAR* toString(){ |
128 | return STRDUP_TtoT(_T("Weight" )); |
129 | } |
130 | }; |
131 | |
132 | class HitDoc:LUCENE_BASE { |
133 | public: |
134 | qreal score; |
135 | int32_t id; |
136 | CL_NS(document)::Document* doc; |
137 | |
138 | HitDoc* next; // in doubly-linked cache |
139 | HitDoc* prev; // in doubly-linked cache |
140 | |
141 | HitDoc(const qreal s, const int32_t i); |
142 | ~HitDoc(); |
143 | }; |
144 | |
145 | |
146 | |
147 | // A ranked list of documents, used to hold search results. |
148 | class Hits:LUCENE_BASE { |
149 | private: |
150 | Query* query; |
151 | Searcher* searcher; |
152 | Filter* filter; |
153 | const Sort* sort; |
154 | |
155 | size_t _length; // the total number of hits |
156 | CL_NS(util)::CLVector<HitDoc*, CL_NS(util)::Deletor::Object<HitDoc> > hitDocs; // cache of hits retrieved |
157 | |
158 | HitDoc* first; // head of LRU cache |
159 | HitDoc* last; // tail of LRU cache |
160 | int32_t numDocs; // number cached |
161 | int32_t maxDocs; // max to cache |
162 | |
163 | public: |
164 | Hits(Searcher* s, Query* q, Filter* f, const Sort* sort=NULL); |
165 | ~Hits(); |
166 | |
167 | /** Returns the total number of hits available in this set. */ |
168 | int32_t length() const; |
169 | |
170 | /** Returns the stored fields of the n<sup>th</sup> document in this set. |
171 | <p>Documents are cached, so that repeated requests for the same element may |
172 | return the same Document object. |
173 | * |
174 | * @memory Memory belongs to the hits object. Don't delete the return value. |
175 | */ |
176 | CL_NS(document)::Document& doc(const int32_t n); |
177 | |
178 | /** Returns the id for the nth document in this set. */ |
179 | int32_t id (const int32_t n); |
180 | |
181 | /** Returns the score for the nth document in this set. */ |
182 | qreal score(const int32_t n); |
183 | |
184 | private: |
185 | // Tries to add new documents to hitDocs. |
186 | // Ensures that the hit numbered <code>_min</code> has been retrieved. |
187 | void getMoreDocs(const size_t _min); |
188 | |
189 | HitDoc* getHitDoc(const size_t n); |
190 | |
191 | void addToFront(HitDoc* hitDoc); |
192 | |
193 | void remove(const HitDoc* hitDoc); |
194 | |
195 | }; |
196 | |
197 | /** The interface for search implementations. |
198 | * |
199 | * <p>Implementations provide search over a single index, over multiple |
200 | * indices, and over indices on remote servers. |
201 | */ |
202 | class Searchable: LUCENE_BASE { |
203 | public: |
204 | virtual ~Searchable(){ |
205 | } |
206 | |
207 | /** Lower-level search API. |
208 | * |
209 | * <p>{@link HitCollector#collect(int32_t,qreal)} is called for every non-zero |
210 | * scoring document. |
211 | * |
212 | * <p>Applications should only use this if they need <i>all</i> of the |
213 | * matching documents. The high-level search API ({@link |
214 | * Searcher#search(Query*)}) is usually more efficient, as it skips |
215 | * non-high-scoring hits. |
216 | * |
217 | * @param query to match documents |
218 | * @param filter if non-null, a bitset used to eliminate some documents |
219 | * @param results to receive hits |
220 | */ |
221 | virtual void _search(Query* query, Filter* filter, HitCollector* results) = 0; |
222 | |
223 | /** Frees resources associated with this Searcher. |
224 | * Be careful not to call this method while you are still using objects |
225 | * like {@link Hits}. |
226 | */ |
227 | virtual void close() = 0; |
228 | |
229 | /** Expert: Returns the number of documents containing <code>term</code>. |
230 | * Called by search code to compute term weights. |
231 | * @see IndexReader#docFreq(Term). |
232 | */ |
233 | virtual int32_t docFreq(const CL_NS(index)::Term* term) const = 0; |
234 | |
235 | /** Expert: Returns one greater than the largest possible document number. |
236 | * Called by search code to compute term weights. |
237 | * @see IndexReader#maxDoc(). |
238 | */ |
239 | virtual int32_t maxDoc() const = 0; |
240 | |
241 | /** Expert: Low-level search implementation. Finds the top <code>n</code> |
242 | * hits for <code>query</code>, applying <code>filter</code> if non-null. |
243 | * |
244 | * <p>Called by {@link Hits}. |
245 | * |
246 | * <p>Applications should usually call {@link Searcher#search(Query*)} or |
247 | * {@link Searcher#search(Query*,Filter*)} instead. |
248 | */ |
249 | virtual TopDocs* _search(Query* query, Filter* filter, const int32_t n) = 0; |
250 | |
251 | /** Expert: Returns the stored fields of document <code>i</code>. |
252 | * Called by {@link HitCollector} implementations. |
253 | * @see IndexReader#document(int32_t). |
254 | */ |
255 | virtual bool doc(int32_t i, CL_NS(document)::Document* d) = 0; |
256 | _CL_DEPRECATED( doc(i, document) ) CL_NS(document)::Document* doc(const int32_t i); |
257 | |
258 | /** Expert: called to re-write queries into primitive queries. */ |
259 | virtual Query* rewrite(Query* query) = 0; |
260 | |
261 | /** Returns an Explanation that describes how <code>doc</code> scored against |
262 | * <code>query</code>. |
263 | * |
264 | * <p>This is intended to be used in developing Similarity implementations, |
265 | * and, for good performance, should not be displayed with every hit. |
266 | * Computing an explanation is as expensive as executing the query over the |
267 | * entire index. |
268 | */ |
269 | virtual void explain(Query* query, int32_t doc, Explanation* ret) = 0; |
270 | |
271 | /** Expert: Low-level search implementation with arbitrary sorting. Finds |
272 | * the top <code>n</code> hits for <code>query</code>, applying |
273 | * <code>filter</code> if non-null, and sorting the hits by the criteria in |
274 | * <code>sort</code>. |
275 | * |
276 | * <p>Applications should usually call {@link |
277 | * Searcher#search(Query,Filter,Sort)} instead. |
278 | */ |
279 | virtual TopFieldDocs* _search(Query* query, Filter* filter, const int32_t n, const Sort* sort) = 0; |
280 | }; |
281 | |
282 | |
283 | |
284 | /** An abstract base class for search implementations. |
285 | * Implements some common utility methods. |
286 | */ |
287 | class Searcher:public Searchable { |
288 | private: |
289 | /** The Similarity implementation used by this searcher. */ |
290 | Similarity* similarity; |
291 | |
292 | public: |
293 | Searcher(){ |
294 | similarity = Similarity::getDefault(); |
295 | } |
296 | virtual ~Searcher(){ |
297 | } |
298 | |
299 | // Returns the documents matching <code>query</code>. |
300 | Hits* search(Query* query) { |
301 | return search(query, (Filter*)NULL ); |
302 | } |
303 | |
304 | // Returns the documents matching <code>query</code> and |
305 | // <code>filter</code>. |
306 | Hits* search(Query* query, Filter* filter) { |
307 | return _CLNEW Hits(this, query, filter); |
308 | } |
309 | |
310 | /** Returns documents matching <code>query</code> sorted by |
311 | * <code>sort</code>. |
312 | */ |
313 | Hits* search(Query* query, const Sort* sort){ |
314 | return _CLNEW Hits(this, query, NULL, sort); |
315 | } |
316 | |
317 | /** Returns documents matching <code>query</code> and <code>filter</code>, |
318 | * sorted by <code>sort</code>. |
319 | */ |
320 | Hits* search(Query* query, Filter* filter, const Sort* sort){ |
321 | return _CLNEW Hits(this, query, filter, sort); |
322 | } |
323 | |
324 | /** Lower-level search API. |
325 | * |
326 | * <p>{@link HitCollector#collect(int32_t ,qreal)} is called for every non-zero |
327 | * scoring document. |
328 | * |
329 | * <p>Applications should only use this if they need <i>all</i> of the |
330 | * matching documents. The high-level search API ({@link |
331 | * Searcher#search(Query*)}) is usually more efficient, as it skips |
332 | * non-high-scoring hits. |
333 | * <p>Note: The <code>score</code> passed to this method is a raw score. |
334 | * In other words, the score will not necessarily be a float whose value is |
335 | * between 0 and 1. |
336 | */ |
337 | void _search(Query* query, HitCollector* results) { |
338 | Searchable::_search(query, NULL, results); |
339 | } |
340 | |
341 | /** Expert: Set the Similarity implementation used by this Searcher. |
342 | * |
343 | * @see Similarity#setDefault(Similarity) |
344 | */ |
345 | void setSimilarity(Similarity* similarity) { |
346 | this->similarity = similarity; |
347 | } |
348 | |
349 | /** Expert: Return the Similarity implementation used by this Searcher. |
350 | * |
351 | * <p>This defaults to the current value of {@link Similarity#getDefault()}. |
352 | */ |
353 | Similarity* getSimilarity(){ |
354 | return this->similarity; |
355 | } |
356 | }; |
357 | |
358 | /** The abstract base class for queries. |
359 | <p>Instantiable subclasses are: |
360 | <ul> |
361 | <li> {@link TermQuery} |
362 | <li> {@link MultiTermQuery} |
363 | <li> {@link BooleanQuery} |
364 | <li> {@link WildcardQuery} |
365 | <li> {@link PhraseQuery} |
366 | <li> {@link PrefixQuery} |
367 | <li> {@link PhrasePrefixQuery} |
368 | <li> {@link FuzzyQuery} |
369 | <li> {@link RangeQuery} |
370 | <li> {@link spans.SpanQuery} |
371 | </ul> |
372 | <p>A parser for queries is contained in: |
373 | <ul> |
374 | <li>{@link queryParser.QueryParser QueryParser} |
375 | </ul> |
376 | */ |
377 | class Query :LUCENE_BASE { |
378 | private: |
379 | // query boost factor |
380 | qreal boost; |
381 | protected: |
382 | Query(const Query& clone); |
383 | public: |
384 | Query(); |
385 | virtual ~Query(); |
386 | |
387 | /** Sets the boost for this query clause to <code>b</code>. Documents |
388 | * matching this clause will (in addition to the normal weightings) have |
389 | * their score multiplied by <code>b</code>. |
390 | */ |
391 | void setBoost(qreal b); |
392 | |
393 | /** Gets the boost for this clause. Documents matching |
394 | * this clause will (in addition to the normal weightings) have their score |
395 | * multiplied by <code>b</code>. The boost is 1.0 by default. |
396 | */ |
397 | qreal getBoost() const; |
398 | |
399 | /** Expert: Constructs an initializes a Weight for a top-level query. */ |
400 | Weight* weight(Searcher* searcher); |
401 | |
402 | /** Expert: called to re-write queries into primitive queries. */ |
403 | virtual Query* rewrite(CL_NS(index)::IndexReader* reader); |
404 | |
405 | /** Expert: called when re-writing queries under MultiSearcher. |
406 | * |
407 | * <p>Only implemented by derived queries, with no |
408 | * {@link #_createWeight(Searcher)} implementatation. |
409 | */ |
410 | virtual Query* combine(Query** queries); |
411 | |
412 | /** Expert: merges the clauses of a set of BooleanQuery's into a single |
413 | * BooleanQuery. |
414 | * |
415 | *<p>A utility for use by {@link #combine(Query[])} implementations. |
416 | */ |
417 | static Query* mergeBooleanQueries(Query** queries); |
418 | |
419 | /** Expert: Returns the Similarity implementation to be used for this query. |
420 | * Subclasses may override this method to specify their own Similarity |
421 | * implementation, perhaps one that delegates through that of the Searcher. |
422 | * By default the Searcher's Similarity implementation is returned.*/ |
423 | Similarity* getSimilarity(Searcher* searcher); |
424 | |
425 | /** Returns a clone of this query. */ |
426 | virtual Query* clone() const = 0; |
427 | virtual const TCHAR* getQueryName() const = 0; |
428 | bool instanceOf(const TCHAR* other) const; |
429 | |
430 | /** Prints a query to a string, with <code>field</code> as the default field |
431 | * for terms. <p>The representation used is one that is readable by |
432 | * {@link queryParser.QueryParser QueryParser} |
433 | * (although, if the query was created by the parser, the printed |
434 | * representation may not be exactly what was parsed). |
435 | */ |
436 | virtual TCHAR* toString(const TCHAR* field) const = 0; |
437 | |
438 | virtual bool equals(Query* other) const = 0; |
439 | virtual size_t hashCode() const = 0; |
440 | |
441 | /** Prints a query to a string. */ |
442 | TCHAR* toString() const; |
443 | |
444 | |
445 | /** Expert: Constructs an appropriate Weight implementation for this query. |
446 | * |
447 | * <p>Only implemented by primitive queries, which re-write to themselves. |
448 | * <i>This is an Internal function</i> |
449 | */ |
450 | virtual Weight* _createWeight(Searcher* searcher); |
451 | |
452 | }; |
453 | |
454 | |
455 | CL_NS_END |
456 | #endif |
457 | |