1/*------------------------------------------------------------------------------
2* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3*
4* Distributable under the terms of either the Apache License (Version 2.0) or
5* the GNU Lesser General Public License, as specified in the COPYING file.
6------------------------------------------------------------------------------*/
7#ifndef _lucene_search_Sort_
8#define _lucene_search_Sort_
9
10
11//#include "CLucene/index/IndexReader.h"
12//#include "SearchHeader.h"
13//#include "CLucene/util/Equator.h"
14CL_CLASS_DEF(index,IndexReader)
15CL_CLASS_DEF(util,Comparable)
16
17CL_NS_DEF(search)
18
19//predefine
20 class SortField;
21 class Sort;
22
23/**
24 * Expert: Compares two ScoreDoc objects for sorting.
25 *
26 */
27 class CLUCENE_EXPORT ScoreDocComparator {
28 protected:
29 ScoreDocComparator(){}
30 public:
31 virtual ~ScoreDocComparator();
32
33 /**
34 * Compares two ScoreDoc objects and returns a result indicating their
35 * sort order.
36 * @param i First ScoreDoc
37 * @param j Second ScoreDoc
38 * @return a negative integer if <code>i</code> should come before <code>j</code><br>
39 * a positive integer if <code>i</code> should come after <code>j</code><br>
40 * <code>0</code> if they are equal
41 * @see java.util.Comparator
42 */
43 virtual int32_t compare (struct ScoreDoc* i, struct ScoreDoc* j) = 0;
44
45 /**
46 * Returns the value used to sort the given document. The
47 * object returned must implement the java.io.Serializable
48 * interface. This is used by multisearchers to determine how
49 * to collate results from their searchers.
50 * @see FieldDoc
51 * @param i Document
52 * @return Serializable object
53 */
54 virtual CL_NS(util)::Comparable* sortValue (struct ScoreDoc* i) = 0;
55
56
57 /**
58 * Returns the type of sort. Should return <code>SortField.SCORE</code>,
59 * <code>SortField.DOC</code>, <code>SortField.STRING</code>,
60 * <code>SortField.INTEGER</code>, <code>SortField.FLOAT</code> or
61 * <code>SortField.CUSTOM</code>. It is not valid to return
62 * <code>SortField.AUTO</code>.
63 * This is used by multisearchers to determine how to collate results
64 * from their searchers.
65 * @return One of the constants in SortField.
66 * @see SortField
67 */
68 virtual int32_t sortType() = 0;
69
70 /** Special comparator for sorting hits according to computed relevance (document score). */
71 static ScoreDocComparator* RELEVANCE();
72
73 /** Special comparator for sorting hits according to index order (document number). */
74 static ScoreDocComparator* INDEXORDER();
75
76 /** Cleanup static data */
77 static CLUCENE_LOCAL void _shutdown();
78 };
79
80/**
81 * Expert: returns a comparator for sorting ScoreDocs.
82 *
83 */
84class CLUCENE_EXPORT SortComparatorSource:LUCENE_BASE {
85public:
86 virtual ~SortComparatorSource(){
87 }
88
89 /**
90 * return a reference to a string describing the name of the comparator
91 * this is used in the explanation
92 */
93 virtual TCHAR* getName() = 0;
94
95 virtual size_t hashCode() = 0;
96
97 /**
98 * Creates a comparator for the field in the given index.
99 * @param reader Index to create comparator for.
100 * @param fieldname Name of the field to create comparator for.
101 * @return Comparator of ScoreDoc objects.
102 * @throws IOException If an error occurs reading the index.
103 */
104 virtual ScoreDocComparator* newComparator (CL_NS(index)::IndexReader* reader, const TCHAR* fieldname) = 0;
105};
106
107
108/**
109 * Abstract base class for sorting hits returned by a Query.
110 *
111 * <p>This class should only be used if the other SortField
112 * types (SCORE, DOC, STRING, INT, FLOAT) do not provide an
113 * adequate sorting. It maintains an internal cache of values which
114 * could be quite large. The cache is an array of Comparable,
115 * one for each document in the index. There is a distinct
116 * Comparable for each unique term in the field - if
117 * some documents have the same term in the field, the cache
118 * array will have entries which reference the same Comparable.
119 *
120 */
121class CLUCENE_EXPORT SortComparator: public SortComparatorSource {
122public:
123 virtual ScoreDocComparator* newComparator (CL_NS(index)::IndexReader* reader, const TCHAR* fieldname);
124
125 SortComparator();
126 virtual ~SortComparator();
127
128 /**
129 * Returns an object which, when sorted according to natural order,
130 * will order the Term values in the correct order.
131 * <p>For example, if the Terms contained integer values, this method
132 * would return <code>new Integer(termtext)</code>. Note that this
133 * might not always be the most efficient implementation - for this
134 * particular example, a better implementation might be to make a
135 * ScoreDocLookupComparator that uses an internal lookup table of int.
136 * @param termtext The textual value of the term.
137 * @return An object representing <code>termtext</code> that sorts
138 * according to the natural order of <code>termtext</code>.
139 * @see Comparable
140 * @see ScoreDocComparator
141 */
142 virtual CL_NS(util)::Comparable* getComparable (const TCHAR* termtext) = 0;
143
144};
145
146
147/**
148 * Stores information about how to sort documents by terms in an individual
149 * field. Fields must be indexed in order to sort by them.
150 *
151 */
152class CLUCENE_EXPORT SortField:LUCENE_BASE {
153private:
154 const TCHAR* field;
155 int32_t type; // defaults to determining type dynamically
156 //Locale* locale; // defaults to "natural order" (no Locale)
157 bool reverse; // defaults to natural order
158 SortComparatorSource* factory;
159
160protected:
161 SortField (const SortField& clone);
162public:
163 virtual ~SortField();
164
165 /** Sort by document score (relevancy). Sort values are Float and higher
166 * values are at the front.
167 * PORTING: this is the same as SCORE in java, it had to be renamed because
168 * SCORE is a system macro on some platforms (AIX).
169 */
170 LUCENE_STATIC_CONSTANT(int32_t, DOCSCORE=0);
171
172 /** Sort by document number (index order). Sort values are Integer and lower
173 * values are at the front. */
174 LUCENE_STATIC_CONSTANT(int32_t, DOC=1);
175
176 /** Guess type of sort based on field contents. A regular expression is used
177 * to look at the first term indexed for the field and determine if it
178 * represents an integer number, a floating point number, or just arbitrary
179 * string characters. */
180 LUCENE_STATIC_CONSTANT(int32_t, AUTO=2);
181
182 /** Sort using term values as Strings. Sort values are String and lower
183 * values are at the front. */
184 LUCENE_STATIC_CONSTANT(int32_t, STRING=3);
185
186 /** Sort using term values as encoded Integers. Sort values are Integer and
187 * lower values are at the front. */
188 LUCENE_STATIC_CONSTANT(int32_t, INT=4);
189
190 /** Sort using term values as encoded Floats. Sort values are Float and
191 * lower values are at the front. */
192 LUCENE_STATIC_CONSTANT(int32_t, FLOAT=5);
193
194 /** Sort using term values as encoded Longs. Sort values are Long and
195 * lower values are at the front. */
196 LUCENE_STATIC_CONSTANT(int32_t, LONG=6);
197
198 /** Sort using term values as encoded Doubles. Sort values are Double and
199 * lower values are at the front. */
200 LUCENE_STATIC_CONSTANT(int32_t, DOUBLE=7);
201
202
203 /** Sort using a custom Comparator. Sort values are any Comparable and
204 * sorting is done according to natural order. */
205 LUCENE_STATIC_CONSTANT(int32_t, CUSTOM=9);
206
207 // IMPLEMENTATION NOTE: the FieldCache.STRING_INDEX is in the same "namespace"
208 // as the above static int values. Any new values must not have the same value
209 // as FieldCache.STRING_INDEX.
210
211 /** Represents sorting by document score (relevancy). */
212 static SortField* FIELD_SCORE();
213
214 /** Represents sorting by document number (index order). */
215 static SortField* FIELD_DOC();
216
217 /** Cleanup static data */
218 static CLUCENE_LOCAL void _shutdown();
219
220 /** Creates a sort by terms in the given field where the type of term value
221 * is determined dynamically ({@link #AUTO AUTO}).
222 * @param field Name of field to sort by, cannot be <code>null</code>.
223 */
224 SortField (const TCHAR* field);
225 //SortField (const TCHAR* field, bool reverse);
226 //todo: we cannot make reverse use default field of =false.
227 //because bool and int are the same type in c, overloading is not possible
228
229 /** Creates a sort, possibly in reverse, by terms in the given field with the
230 * type of term values explicitly given.
231 * @param field Name of field to sort by. Can be <code>null</code> if
232 * <code>type</code> is SCORE or DOC.
233 * @param type Type of values in the terms.
234 * @param reverse True if natural order should be reversed (default=false).
235 */
236 SortField (const TCHAR* field, int32_t type, bool reverse);
237
238 /*
239 SortField (TCHAR* field, Locale* locale) {
240 SortField (TCHAR* field, Locale* locale, bool reverse);*/
241
242 /** Creates a sort, possibly in reverse, with a custom comparison function.
243 * @param field Name of field to sort by; cannot be <code>null</code>.
244 * @param comparator Returns a comparator for sorting hits.
245 * @param reverse True if natural order should be reversed (default=false).
246 */
247 SortField (const TCHAR* field, SortComparatorSource* comparator, bool reverse=false);
248
249 /** Returns the name of the field. Could return <code>null</code>
250 * if the sort is by SCORE or DOC.
251 * @return Name of field, possibly <code>null</code>.
252 */
253 const TCHAR* getField() const;
254
255 SortField* clone() const;
256
257 /** Returns the type of contents in the field.
258 * @return One of the constants SCORE, DOC, AUTO, STRING, INT or FLOAT.
259 */
260 int32_t getType() const;
261
262 /** Returns the Locale by which term values are interpreted.
263 * May return <code>null</code> if no Locale was specified.
264 * @return Locale, or <code>null</code>.
265 */
266 /*Locale getLocale() {
267 return locale;
268 }*/
269
270 /** Returns whether the sort should be reversed.
271 * @return True if natural order should be reversed.
272 */
273 bool getReverse() const;
274
275 SortComparatorSource* getFactory() const;
276
277 TCHAR* toString() const;
278};
279
280
281
282/**
283 * Encapsulates sort criteria for returned hits.
284 *
285 * <p>The fields used to determine sort order must be carefully chosen.
286 * Documents must contain a single term in such a field,
287 * and the value of the term should indicate the document's relative position in
288 * a given sort order. The field must be indexed, but should not be tokenized,
289 * and does not need to be stored (unless you happen to want it back with the
290 * rest of your document data). In other words:
291 *
292 * <dl><dd><code>document.add (new Field ("byNumber", Integer.toString(x), false, true, false));</code>
293 * </dd></dl>
294 *
295 * <p><h3>Valid Types of Values</h3>
296 *
297 * <p>There are three possible kinds of term values which may be put into
298 * sorting fields: Integers, Floats, or Strings. Unless
299 * {@link SortField SortField} objects are specified, the type of value
300 * in the field is determined by parsing the first term in the field.
301 *
302 * <p>Integer term values should contain only digits and an optional
303 * preceeding negative sign. Values must be base 10 and in the range
304 * <code>Integer.MIN_VALUE</code> and <code>Integer.MAX_VALUE</code> inclusive.
305 * Documents which should appear first in the sort
306 * should have low value integers, later documents high values
307 * (i.e. the documents should be numbered <code>1..n</code> where
308 * <code>1</code> is the first and <code>n</code> the last).
309 *
310 * <p>Float term values should conform to values accepted by
311 * {@link Float Float.valueOf(String)} (except that <code>NaN</code>
312 * and <code>Infinity</code> are not supported).
313 * Documents which should appear first in the sort
314 * should have low values, later documents high values.
315 *
316 * <p>String term values can contain any valid String, but should
317 * not be tokenized. The values are sorted according to their
318 * {@link Comparable natural order}. Note that using this type
319 * of term value has higher memory requirements than the other
320 * two types.
321 *
322 * <p><h3>Object Reuse</h3>
323 *
324 * <p>One of these objects can be
325 * used multiple times and the sort order changed between usages.
326 *
327 * <p>This class is thread safe.
328 *
329 * <p><h3>Memory Usage</h3>
330 *
331 * <p>Sorting uses of caches of term values maintained by the
332 * internal HitQueue(s). The cache is static and contains an integer
333 * or float array of length <code>IndexReader.maxDoc()</code> for each field
334 * name for which a sort is performed. In other words, the size of the
335 * cache in bytes is:
336 *
337 * <p><code>4 * IndexReader.maxDoc() * (# of different fields actually used to sort)</code>
338 *
339 * <p>For String fields, the cache is larger: in addition to the
340 * above array, the value of every term in the field is kept in memory.
341 * If there are many unique terms in the field, this could
342 * be quite large.
343 *
344 * <p>Note that the size of the cache is not affected by how many
345 * fields are in the index and <i>might</i> be used to sort - only by
346 * the ones actually used to sort a result set.
347 *
348 * <p>The cache is cleared each time a new <code>IndexReader</code> is
349 * passed in, or if the value returned by <code>maxDoc()</code>
350 * changes for the current IndexReader. This class is not set up to
351 * be able to efficiently sort hits from more than one index
352 * simultaneously.
353 *
354 */
355class CLUCENE_EXPORT Sort:LUCENE_BASE {
356 // internal representation of the sort criteria
357 SortField** fields;
358 void clear();
359public:
360 ~Sort();
361
362 /** Represents sorting by computed relevance. Using this sort criteria
363 * returns the same results as calling {@link Searcher#search(Query) Searcher#search()}
364 * without a sort criteria, only with slightly more overhead. */
365 static Sort* RELEVANCE();
366
367 /** Represents sorting by index order. */
368 static Sort* INDEXORDER();
369
370 /** Cleanup static data */
371 static CLUCENE_LOCAL void _shutdown();
372
373
374 /** Sorts by computed relevance. This is the same sort criteria as
375 * calling {@link Searcher#search(Query) Searcher#search()} without a sort criteria, only with
376 * slightly more overhead. */
377 Sort();
378
379 /** Sorts possibly in reverse by the terms in <code>field</code> then by
380 * index order (document number). The type of value in <code>field</code> is determined
381 * automatically.
382 * @see SortField#AUTO
383 */
384 Sort (const TCHAR* field, bool reverse=false);
385
386 /** Sorts in succession by the terms in each field.
387 * The type of value in <code>field</code> is determined
388 * automatically.
389 * @see SortField#AUTO
390 */
391 Sort (const TCHAR** fields);
392
393 /** Sorts by the criteria in the given SortField. */
394 Sort (SortField* field);
395
396 /** Sorts in succession by the criteria in each SortField. */
397 Sort (SortField** fields);
398
399 /** Sets the sort to the terms in <code>field</code> possibly in reverse,
400 * then by index order (document number). */
401 void setSort (const TCHAR* field, bool reverse=false);
402
403 /** Sets the sort to the terms in each field in succession. */
404 void setSort (const TCHAR** fieldnames);
405
406 /** Sets the sort to the given criteria. */
407 void setSort (SortField* field);
408
409 /** Sets the sort to the given criteria in succession. */
410 void setSort (SortField** fields);
411
412 TCHAR* toString() const;
413
414 /**
415 * Representation of the sort criteria.
416 * @return a pointer to the of SortField array used in this sort criteria
417 */
418 SortField** getSort() const{ return fields; }
419};
420
421
422
423
424
425CL_NS_END
426#endif
427