1/*------------------------------------------------------------------------------
2* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3*
4* Distributable under the terms of either the Apache License (Version 2.0) or
5* the GNU Lesser General Public License, as specified in the COPYING file.
6------------------------------------------------------------------------------*/
7#ifndef _lucene_search_Sort_
8#define _lucene_search_Sort_
9
10#if defined(_LUCENE_PRAGMA_ONCE)
11# pragma once
12#endif
13
14#include "CLucene/index/IndexReader.h"
15#include "SearchHeader.h"
16
17CL_NS_DEF(search)
18
19 class SortField; //predefine
20 class Sort;
21
22/**
23 * Expert: Compares two ScoreDoc objects for sorting.
24 *
25 */
26 class ScoreDocComparator:LUCENE_BASE {
27 protected:
28 ScoreDocComparator(){}
29 public:
30 virtual ~ScoreDocComparator();
31// CL_NS(util)::Comparable** cachedValues;
32// ScoreDocComparator(CL_NS(util)::Comparable** cachedValues);
33
34 /**
35 * Compares two ScoreDoc objects and returns a result indicating their
36 * sort order.
37 * @param i First ScoreDoc
38 * @param j Second ScoreDoc
39 * @return <code>-1</code> if <code>i</code> should come before <code>j</code><br><code>1</code> if <code>i</code> should come after <code>j</code><br><code>0</code> if they are equal
40 * @see java.util.Comparator
41 */
42 virtual int32_t compare (struct ScoreDoc* i, struct ScoreDoc* j) = 0;
43
44 /**
45 * Returns the value used to sort the given document. The
46 * object returned must implement the java.io.Serializable
47 * interface. This is used by multisearchers to determine how to collate results from their searchers.
48 * @see FieldDoc
49 * @param i Document
50 * @return Serializable object
51 */
52 virtual CL_NS(util)::Comparable* sortValue (struct ScoreDoc* i) = 0;
53
54
55 /**
56 * Returns the type of sort. Should return <code>SortField.SCORE</code>, <code>SortField.DOC</code>, <code>SortField.STRING</code>, <code>SortField.INTEGER</code>,
57 * <code>SortField::FLOAT</code> or <code>SortField.CUSTOM</code>. It is not valid to return <code>SortField.AUTO</code>.
58 * This is used by multisearchers to determine how to collate results from their searchers.
59 * @return One of the constants in SortField.
60 * @see SortField
61 */
62 virtual int32_t sortType() = 0;
63
64 /** Special comparator for sorting hits according to computed relevance (document score). */
65 static ScoreDocComparator* RELEVANCE;
66
67 /** Special comparator for sorting hits according to index order (document number). */
68 static ScoreDocComparator* INDEXORDER;
69 };
70
71/**
72 * Expert: returns a comparator for sorting ScoreDocs.
73 *
74 */
75class SortComparatorSource:LUCENE_BASE {
76public:
77 virtual ~SortComparatorSource(){
78 }
79
80 /**
81 * return a reference to a string describing the name of the comparator
82 * this is used in the explanation
83 */
84 virtual TCHAR* getName() = 0;
85
86 virtual size_t hashCode() = 0;
87
88 /**
89 * Creates a comparator for the field in the given index.
90 * @param reader Index to create comparator for.
91 * @param fieldname Field to create comparator for.
92 * @return Comparator of ScoreDoc objects.
93 * @throws IOException If an error occurs reading the index.
94 */
95 virtual ScoreDocComparator* newComparator (CL_NS(index)::IndexReader* reader, const TCHAR* fieldname) = 0;
96};
97
98
99/**
100 * Abstract base class for sorting hits returned by a Query.
101 *
102 * <p>This class should only be used if the other SortField
103 * types (SCORE, DOC, STRING, INT, FLOAT) do not provide an
104 * adequate sorting. It maintains an internal cache of values which
105 * could be quite large. The cache is an array of Comparable,
106 * one for each document in the index. There is a distinct
107 * Comparable for each unique term in the field - if
108 * some documents have the same term in the field, the cache
109 * array will have entries which reference the same Comparable.
110 *
111 */
112class SortComparator: public SortComparatorSource {
113public:
114 virtual ScoreDocComparator* newComparator (CL_NS(index)::IndexReader* reader, const TCHAR* fieldname);
115
116 SortComparator();
117 virtual ~SortComparator();
118
119 /**
120 * Returns an object which, when sorted according to natural order,
121 * will order the Term values in the correct order.
122 * <p>For example, if the Terms contained integer values, this method
123 * would return <code>new Integer(termtext)</code>. Note that this
124 * might not always be the most efficient implementation - for this
125 * particular example, a better implementation might be to make a
126 * ScoreDocLookupComparator that uses an internal lookup table of int.
127 * @param termtext The textual value of the term.
128 * @return An object representing <code>termtext</code> that sorts
129 * according to the natural order of <code>termtext</code>.
130 * @see Comparable
131 * @see ScoreDocComparator
132 */
133 virtual CL_NS(util)::Comparable* getComparable (const TCHAR* termtext) = 0;
134
135};
136
137
138/**
139 * Stores information about how to sort documents by terms in an individual
140 * field. Fields must be indexed in order to sort by them.
141 *
142 */
143class SortField:LUCENE_BASE {
144private:
145 const TCHAR* field;
146 int32_t type; // defaults to determining type dynamically
147 //Locale* locale; // defaults to "natural order" (no Locale)
148 bool reverse; // defaults to natural order
149 SortComparatorSource* factory;
150
151protected:
152 SortField (const SortField& clone);
153public:
154 virtual ~SortField();
155
156 /** Sort by document score (relevancy). Sort values are Float and higher
157 * values are at the front.
158 * PORTING: this is the same as SCORE in java, it had to be renamed because
159 * SCORE is a system macro on some platforms (AIX).
160 */
161 LUCENE_STATIC_CONSTANT(int32_t, DOCSCORE=0);
162
163 /** Sort by document number (index order). Sort values are Integer and lower
164 * values are at the front. */
165 LUCENE_STATIC_CONSTANT(int32_t, DOC=1);
166
167 /** Guess type of sort based on field contents. A regular expression is used
168 * to look at the first term indexed for the field and determine if it
169 * represents an integer number, a floating point number, or just arbitrary
170 * string characters. */
171 LUCENE_STATIC_CONSTANT(int32_t, AUTO=2);
172
173 /** Sort using term values as Strings. Sort values are String and lower
174 * values are at the front. */
175 LUCENE_STATIC_CONSTANT(int32_t, STRING=3);
176
177 /** Sort using term values as encoded Integers. Sort values are Integer and
178 * lower values are at the front. */
179 LUCENE_STATIC_CONSTANT(int32_t, INT=4);
180
181 /** Sort using term values as encoded Floats. Sort values are Float and
182 * lower values are at the front. */
183 LUCENE_STATIC_CONSTANT(int32_t, FLOAT=5);
184
185 /** Sort using a custom Comparator. Sort values are any Comparable and
186 * sorting is done according to natural order. */
187 LUCENE_STATIC_CONSTANT(int32_t, CUSTOM=9);
188
189 // IMPLEMENTATION NOTE: the FieldCache.STRING_INDEX is in the same "namespace"
190 // as the above static int values. Any new values must not have the same value
191 // as FieldCache.STRING_INDEX.
192
193 /** Represents sorting by document score (relevancy). */
194 static SortField* FIELD_SCORE;
195
196 /** Represents sorting by document number (index order). */
197 static SortField* FIELD_DOC;
198
199 SortField (const TCHAR* field);
200 //SortField (const TCHAR* field, bool reverse);
201 //todo: we cannot make reverse use default field of =false.
202 //because bool and int are the same type in c, overloading is not possible
203 SortField (const TCHAR* field, int32_t type, bool reverse);
204
205 /*
206 SortField (TCHAR* field, Locale* locale) {
207 SortField (TCHAR* field, Locale* locale, bool reverse);*/
208
209 SortField (const TCHAR* field, SortComparatorSource* comparator, bool reverse=false);
210
211 /** Returns the name of the field. Could return <code>null</code>
212 * if the sort is by SCORE or DOC.
213 * @return Name of field, possibly <code>null</code>.
214 */
215 const TCHAR* getField() const { return field; }
216
217 SortField* clone() const;
218
219 /** Returns the type of contents in the field.
220 * @return One of the constants SCORE, DOC, AUTO, STRING, INT or FLOAT.
221 */
222 int32_t getType() const { return type; }
223
224 /** Returns the Locale by which term values are interpreted.
225 * May return <code>null</code> if no Locale was specified.
226 * @return Locale, or <code>null</code>.
227 */
228 /*Locale getLocale() {
229 return locale;
230 }*/
231
232 /** Returns whether the sort should be reversed.
233 * @return True if natural order should be reversed.
234 */
235 bool getReverse() const { return reverse; }
236
237 SortComparatorSource* getFactory() const { return factory; }
238
239 TCHAR* toString() const;
240};
241
242
243
244/**
245 * Encapsulates sort criteria for returned hits.
246 *
247 * <p>The fields used to determine sort order must be carefully chosen.
248 * Documents must contain a single term in such a field,
249 * and the value of the term should indicate the document's relative position in
250 * a given sort order. The field must be indexed, but should not be tokenized,
251 * and does not need to be stored (unless you happen to want it back with the
252 * rest of your document data). In other words:
253 *
254 * <dl><dd><code>document.add (new Field ("byNumber", Integer.toString(x), false, true, false));</code>
255 * </dd></dl>
256 *
257 * <p><h3>Valid Types of Values</h3>
258 *
259 * <p>There are three possible kinds of term values which may be put into
260 * sorting fields: Integers, Floats, or Strings. Unless
261 * {@link SortField SortField} objects are specified, the type of value
262 * in the field is determined by parsing the first term in the field.
263 *
264 * <p>Integer term values should contain only digits and an optional
265 * preceeding negative sign. Values must be base 10 and in the range
266 * <code>Integer.MIN_VALUE</code> and <code>Integer.MAX_VALUE</code> inclusive.
267 * Documents which should appear first in the sort
268 * should have low value integers, later documents high values
269 * (i.e. the documents should be numbered <code>1..n</code> where
270 * <code>1</code> is the first and <code>n</code> the last).
271 *
272 * <p>Float term values should conform to values accepted by
273 * {@link Float Float.valueOf(String)} (except that <code>NaN</code>
274 * and <code>Infinity</code> are not supported).
275 * Documents which should appear first in the sort
276 * should have low values, later documents high values.
277 *
278 * <p>String term values can contain any valid String, but should
279 * not be tokenized. The values are sorted according to their
280 * {@link Comparable natural order}. Note that using this type
281 * of term value has higher memory requirements than the other
282 * two types.
283 *
284 * <p><h3>Object Reuse</h3>
285 *
286 * <p>One of these objects can be
287 * used multiple times and the sort order changed between usages.
288 *
289 * <p>This class is thread safe.
290 *
291 * <p><h3>Memory Usage</h3>
292 *
293 * <p>Sorting uses of caches of term values maintained by the
294 * internal HitQueue(s). The cache is static and contains an integer
295 * or float array of length <code>IndexReader.maxDoc()</code> for each field
296 * name for which a sort is performed. In other words, the size of the
297 * cache in bytes is:
298 *
299 * <p><code>4 * IndexReader.maxDoc() * (# of different fields actually used to sort)</code>
300 *
301 * <p>For String fields, the cache is larger: in addition to the
302 * above array, the value of every term in the field is kept in memory.
303 * If there are many unique terms in the field, this could
304 * be quite large.
305 *
306 * <p>Note that the size of the cache is not affected by how many
307 * fields are in the index and <i>might</i> be used to sort - only by
308 * the ones actually used to sort a result set.
309 *
310 * <p>The cache is cleared each time a new <code>IndexReader</code> is
311 * passed in, or if the value returned by <code>maxDoc()</code>
312 * changes for the current IndexReader. This class is not set up to
313 * be able to efficiently sort hits from more than one index
314 * simultaneously.
315 *
316 */
317class Sort:LUCENE_BASE {
318 // internal representation of the sort criteria
319 SortField** fields;
320 void clear();
321public:
322 ~Sort();
323
324 /** Represents sorting by computed relevance. Using this sort criteria
325 * returns the same results as calling {@link Searcher#search(Query) Searcher#search()}
326 * without a sort criteria, only with slightly more overhead. */
327 static Sort* RELEVANCE;
328
329 /** Represents sorting by index order. */
330 static Sort* INDEXORDER;
331
332 Sort();
333 Sort (const TCHAR* field, bool reverse=false);
334 Sort (const TCHAR** fields);
335 Sort (SortField* field);
336 Sort (SortField** fields);
337 void setSort (const TCHAR* field, bool reverse=false);
338 void setSort (const TCHAR** fieldnames);
339 void setSort (SortField* field);
340 void setSort (SortField** fields);
341
342 TCHAR* toString() const;
343
344 /**
345 * Representation of the sort criteria.
346 * @return a pointer to the of SortField array used in this sort criteria
347 */
348 SortField** getSort() const{ return fields; }
349};
350
351
352
353
354
355CL_NS_END
356#endif
357