Sort.h [qt4/src/3rdparty/clucene/src/CLucene/search/Sort.h]

1	/------------------------------------------------------------------------------*
2	* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3	*
4	* Distributable under the terms of either the Apache License (Version 2.0) or
5	* the GNU Lesser General Public License, as specified in the COPYING file.
6	------------------------------------------------------------------------------/*
7	#ifndef _lucene_search_Sort_
8	#define _lucene_search_Sort_
9
10	#if defined(_LUCENE_PRAGMA_ONCE)
11	# pragma once
12	#endif
13
14	#include "CLucene/index/IndexReader.h"
15	#include "SearchHeader.h"
16
17	CL_NS_DEF(search)
18
19	class SortField; //predefine
20	class Sort;
21
22	/**
23	* Expert: Compares two ScoreDoc objects for sorting.
24	*
25	*/
26	class ScoreDocComparator:LUCENE_BASE {
27	protected:
28	ScoreDocComparator(){}
29	public:
30	virtual ~ScoreDocComparator();
31	// CL_NS(util)::Comparable* cachedValues;*
32	// ScoreDocComparator(CL_NS(util)::Comparable* cachedValues);*
33
34	/**
35	* Compares two ScoreDoc objects and returns a result indicating their
36	* sort order.
37	* @param i First ScoreDoc
38	* @param j Second ScoreDoc
39	* @return <code>-1</code> if <code>i</code> should come before <code>j</code><br><code>1</code> if <code>i</code> should come after <code>j</code><br><code>0</code> if they are equal
40	* @see java.util.Comparator
41	*/
42	virtual int32_t compare (struct ScoreDoc* i, struct ScoreDoc* j) = `0`;
43
44	/**
45	* Returns the value used to sort the given document. The
46	* object returned must implement the java.io.Serializable
47	* interface. This is used by multisearchers to determine how to collate results from their searchers.
48	* @see FieldDoc
49	* @param i Document
50	* @return Serializable object
51	*/
52	virtual CL_NS(util)::Comparable* sortValue (struct ScoreDoc* i) = `0`;
53
54
55	/**
56	* Returns the type of sort. Should return <code>SortField.SCORE</code>, <code>SortField.DOC</code>, <code>SortField.STRING</code>, <code>SortField.INTEGER</code>,
57	* <code>SortField::FLOAT</code> or <code>SortField.CUSTOM</code>. It is not valid to return <code>SortField.AUTO</code>.
58	* This is used by multisearchers to determine how to collate results from their searchers.
59	* @return One of the constants in SortField.
60	* @see SortField
61	*/
62	virtual int32_t sortType() = `0`;
63
64	/* Special comparator for sorting hits according to computed relevance (document score). /
65	static ScoreDocComparator* RELEVANCE;
66
67	/* Special comparator for sorting hits according to index order (document number). /
68	static ScoreDocComparator* INDEXORDER;
69	};
70
71	/**
72	* Expert: returns a comparator for sorting ScoreDocs.
73	*
74	*/
75	class SortComparatorSource:LUCENE_BASE {
76	public:
77	virtual ~SortComparatorSource(){
78	}
79
80	/**
81	* return a reference to a string describing the name of the comparator
82	* this is used in the explanation
83	*/
84	virtual TCHAR* getName() = `0`;
85
86	virtual size_t hashCode() = `0`;
87
88	/**
89	* Creates a comparator for the field in the given index.
90	* @param reader Index to create comparator for.
91	* @param fieldname Field to create comparator for.
92	* @return Comparator of ScoreDoc objects.
93	* @throws IOException If an error occurs reading the index.
94	*/
95	virtual ScoreDocComparator* newComparator (CL_NS(index)::IndexReader* reader, const TCHAR* fieldname) = `0`;
96	};
97
98
99	/**
100	* Abstract base class for sorting hits returned by a Query.
101	*
102	* <p>This class should only be used if the other SortField
103	* types (SCORE, DOC, STRING, INT, FLOAT) do not provide an
104	* adequate sorting. It maintains an internal cache of values which
105	* could be quite large. The cache is an array of Comparable,
106	* one for each document in the index. There is a distinct
107	* Comparable for each unique term in the field - if
108	* some documents have the same term in the field, the cache
109	* array will have entries which reference the same Comparable.
110	*
111	*/
112	class SortComparator: public SortComparatorSource {
113	public:
114	virtual ScoreDocComparator* newComparator (CL_NS(index)::IndexReader* reader, const TCHAR* fieldname);
115
116	SortComparator();
117	virtual ~SortComparator();
118
119	/**
120	* Returns an object which, when sorted according to natural order,
121	* will order the Term values in the correct order.
122	* <p>For example, if the Terms contained integer values, this method
123	* would return <code>new Integer(termtext)</code>. Note that this
124	* might not always be the most efficient implementation - for this
125	* particular example, a better implementation might be to make a
126	* ScoreDocLookupComparator that uses an internal lookup table of int.
127	* @param termtext The textual value of the term.
128	* @return An object representing <code>termtext</code> that sorts
129	* according to the natural order of <code>termtext</code>.
130	* @see Comparable
131	* @see ScoreDocComparator
132	*/
133	virtual CL_NS(util)::Comparable* getComparable (const TCHAR* termtext) = `0`;
134
135	};
136
137
138	/**
139	* Stores information about how to sort documents by terms in an individual
140	* field. Fields must be indexed in order to sort by them.
141	*
142	*/
143	class SortField:LUCENE_BASE {
144	private:
145	const TCHAR* field;
146	int32_t type; // defaults to determining type dynamically
147	//Locale locale; // defaults to "natural order" (no Locale)*
148	bool reverse; // defaults to natural order
149	SortComparatorSource* factory;
150
151	protected:
152	SortField (const SortField& clone);
153	public:
154	virtual ~SortField();
155
156	/* Sort by document score (relevancy). Sort values are Float and higher*
157	* values are at the front.
158	* PORTING: this is the same as SCORE in java, it had to be renamed because
159	* SCORE is a system macro on some platforms (AIX).
160	*/
161	LUCENE_STATIC_CONSTANT(int32_t, DOCSCORE=`0`);
162
163	/* Sort by document number (index order). Sort values are Integer and lower*
164	* values are at the front. */
165	LUCENE_STATIC_CONSTANT(int32_t, DOC=`1`);
166
167	/* Guess type of sort based on field contents. A regular expression is used*
168	* to look at the first term indexed for the field and determine if it
169	* represents an integer number, a floating point number, or just arbitrary
170	* string characters. */
171	LUCENE_STATIC_CONSTANT(int32_t, AUTO=`2`);
172
173	/* Sort using term values as Strings. Sort values are String and lower*
174	* values are at the front. */
175	LUCENE_STATIC_CONSTANT(int32_t, STRING=`3`);
176
177	/* Sort using term values as encoded Integers. Sort values are Integer and*
178	* lower values are at the front. */
179	LUCENE_STATIC_CONSTANT(int32_t, INT=`4`);
180
181	/* Sort using term values as encoded Floats. Sort values are Float and*
182	* lower values are at the front. */
183	LUCENE_STATIC_CONSTANT(int32_t, FLOAT=`5`);
184
185	/* Sort using a custom Comparator. Sort values are any Comparable and*
186	* sorting is done according to natural order. */
187	LUCENE_STATIC_CONSTANT(int32_t, CUSTOM=`9`);
188
189	// IMPLEMENTATION NOTE: the FieldCache.STRING_INDEX is in the same "namespace"
190	// as the above static int values. Any new values must not have the same value
191	// as FieldCache.STRING_INDEX.
192
193	/* Represents sorting by document score (relevancy). /
194	static SortField* FIELD_SCORE;
195
196	/* Represents sorting by document number (index order). /
197	static SortField* FIELD_DOC;
198
199	SortField (const TCHAR* field);
200	//SortField (const TCHAR field, bool reverse);*
201	//todo: we cannot make reverse use default field of =false.
202	//because bool and int are the same type in c, overloading is not possible
203	SortField (const TCHAR* field, int32_t type, bool reverse);
204
205	/*
206	SortField (TCHAR field, Locale* locale) {*
207	SortField (TCHAR field, Locale* locale, bool reverse);/
208
209	SortField (const TCHAR* field, SortComparatorSource* comparator, bool reverse=false);
210
211	/* Returns the name of the field. Could return <code>null</code>*
212	* if the sort is by SCORE or DOC.
213	* @return Name of field, possibly <code>null</code>.
214	*/
215	const TCHAR* getField() const { return field; }
216
217	SortField* clone() const;
218
219	/* Returns the type of contents in the field.*
220	* @return One of the constants SCORE, DOC, AUTO, STRING, INT or FLOAT.
221	*/
222	int32_t getType() const { return type; }
223
224	/* Returns the Locale by which term values are interpreted.*
225	* May return <code>null</code> if no Locale was specified.
226	* @return Locale, or <code>null</code>.
227	*/
228	/Locale getLocale() {*
229	return locale;
230	}/*
231
232	/* Returns whether the sort should be reversed.*
233	* @return True if natural order should be reversed.
234	*/
235	bool getReverse() const { return reverse; }
236
237	SortComparatorSource* getFactory() const { return factory; }
238
239	TCHAR* toString() const;
240	};
241
242
243
244	/**
245	* Encapsulates sort criteria for returned hits.
246	*
247	* <p>The fields used to determine sort order must be carefully chosen.
248	* Documents must contain a single term in such a field,
249	* and the value of the term should indicate the document's relative position in
250	* a given sort order. The field must be indexed, but should not be tokenized,
251	* and does not need to be stored (unless you happen to want it back with the
252	* rest of your document data). In other words:
253	*
254	* <dl><dd><code>document.add (new Field ("byNumber", Integer.toString(x), false, true, false));</code>
255	* </dd></dl>
256	*
257	* <p><h3>Valid Types of Values</h3>
258	*
259	* <p>There are three possible kinds of term values which may be put into
260	* sorting fields: Integers, Floats, or Strings. Unless
261	* {@link SortField SortField} objects are specified, the type of value
262	* in the field is determined by parsing the first term in the field.
263	*
264	* <p>Integer term values should contain only digits and an optional
265	* preceeding negative sign. Values must be base 10 and in the range
266	* <code>Integer.MIN_VALUE</code> and <code>Integer.MAX_VALUE</code> inclusive.
267	* Documents which should appear first in the sort
268	* should have low value integers, later documents high values
269	* (i.e. the documents should be numbered <code>1..n</code> where
270	* <code>1</code> is the first and <code>n</code> the last).
271	*
272	* <p>Float term values should conform to values accepted by
273	* {@link Float Float.valueOf(String)} (except that <code>NaN</code>
274	* and <code>Infinity</code> are not supported).
275	* Documents which should appear first in the sort
276	* should have low values, later documents high values.
277	*
278	* <p>String term values can contain any valid String, but should
279	* not be tokenized. The values are sorted according to their
280	* {@link Comparable natural order}. Note that using this type
281	* of term value has higher memory requirements than the other
282	* two types.
283	*
284	* <p><h3>Object Reuse</h3>
285	*
286	* <p>One of these objects can be
287	* used multiple times and the sort order changed between usages.
288	*
289	* <p>This class is thread safe.
290	*
291	* <p><h3>Memory Usage</h3>
292	*
293	* <p>Sorting uses of caches of term values maintained by the
294	* internal HitQueue(s). The cache is static and contains an integer
295	* or float array of length <code>IndexReader.maxDoc()</code> for each field
296	* name for which a sort is performed. In other words, the size of the
297	* cache in bytes is:
298	*
299	* <p><code>4 * IndexReader.maxDoc() * (# of different fields actually used to sort)</code>
300	*
301	* <p>For String fields, the cache is larger: in addition to the
302	* above array, the value of every term in the field is kept in memory.
303	* If there are many unique terms in the field, this could
304	* be quite large.
305	*
306	* <p>Note that the size of the cache is not affected by how many
307	* fields are in the index and <i>might</i> be used to sort - only by
308	* the ones actually used to sort a result set.
309	*
310	* <p>The cache is cleared each time a new <code>IndexReader</code> is
311	* passed in, or if the value returned by <code>maxDoc()</code>
312	* changes for the current IndexReader. This class is not set up to
313	* be able to efficiently sort hits from more than one index
314	* simultaneously.
315	*
316	*/
317	class Sort:LUCENE_BASE {
318	// internal representation of the sort criteria
319	SortField** fields;
320	void clear();
321	public:
322	~Sort();
323
324	/* Represents sorting by computed relevance. Using this sort criteria*
325	* returns the same results as calling {@link Searcher#search(Query) Searcher#search()}
326	* without a sort criteria, only with slightly more overhead. */
327	static Sort* RELEVANCE;
328
329	/* Represents sorting by index order. /
330	static Sort* INDEXORDER;
331
332	Sort();
333	Sort (const TCHAR* field, bool reverse=false);
334	Sort (const TCHAR** fields);
335	Sort (SortField* field);
336	Sort (SortField** fields);
337	void setSort (const TCHAR* field, bool reverse=false);
338	void setSort (const TCHAR** fieldnames);
339	void setSort (SortField* field);
340	void setSort (SortField** fields);
341
342	TCHAR* toString() const;
343
344	/**
345	* Representation of the sort criteria.
346	* @return a pointer to the of SortField array used in this sort criteria
347	*/
348	SortField** getSort() const{ return fields; }
349	};
350
351
352
353
354
355	CL_NS_END
356	#endif
357