CLConfig.h [qt4/src/3rdparty/clucene/src/CLucene/CLConfig.h]

1	/------------------------------------------------------------------------------*
2	* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3	*
4	* Distributable under the terms of either the Apache License (Version 2.0) or
5	* the GNU Lesser General Public License, as specified in the COPYING file.
6	------------------------------------------------------------------------------/*
7	#ifndef _lucene_Config_
8	#define _lucene_Config_
9
10
11	////////////////////////////////////////////////////////////////////
12	// this settings should be set up in the compiler,
13	// but are put here for reference as to what could be defined
14	////////////////////////////////////////////////////////////////////
15	//
16	//define this if you want debugging code to be enabled
17	//#define _DEBUG
18	//
19	//define this if you want condition debugging to be enabled
20	#if defined(_DEBUG) && !defined(_CL__CND_DEBUG)
21	#define _CL__CND_DEBUG
22	#endif
23	//
24	//define this to print out lots of information about merges, etc
25	//requires __CL__CND_DEBUG to be defined
26	//#define _CL_DEBUG_INFO stdout
27	//
28	//to disable namespaces define this
29	//#define DISABLE_NAMESPACE
30	//
31	//This is mostly for windows. If you have put the google sparse
32	//map code in your include path somewhere, then define this
33	//to use it.
34	//However, for msvc, there are no significant gains since there
35	//is already a compatible hashmap available.
36	//#define _CL_HAVE_GOOGLE_DENSE_HASH_MAP
37	//
38	////////////////////////////////////////////////////////////////////
39
40	////////////////////////////////////////////////////////////////////
41	// These options can be set depending on the particular needs of
42	// Your application
43	////////////////////////////////////////////////////////////////////
44	//
45	//define this to force the build into ascii mode
46	//#define _ASCII
47	//
48	//define this to force the build into ucs2 mode
49	//#define _UCS2
50	//
51	//if a wide character is being converted to a ascii character and it
52	//cannot fit, this character is used instead. Required.
53	#define LUCENE_OOR_CHAR(c) ((char)(((unsigned short)c)&0xFF))
54	//
55	//define if you would like to force clucene to use the internal
56	//character functions.
57	//Tests may display unpredictable behaviour if this is not defined.
58	#define LUCENE_USE_INTERNAL_CHAR_FUNCTIONS
59	//
60	//define this to enable mmap support in the fsdirectory IndexInput
61	//todo: only available for windows so far...need to add MMapInput.cpp to project
62	//EXPERIMENTAL
63	//#define LUCENE_FS_MMAP
64	//
65	//LOCK_DIR implementation:
66	//define this to set an exact directory for the lock dir (not recommended)
67	//all other methods of getting the temporary directory will be ignored
68	//#define LUCENE_LOCK_DIR "/tmp"
69	//
70	//define this to try and load the lock dir from this specified environment variable
71	#define LUCENE_LOCK_DIR_ENV_1 "TEMP"
72	//define this if you want to have look up this environment variable if the first one fails
73	#define LUCENE_LOCK_DIR_ENV_2 "TMP"
74	//define this if you want to have a fallback directory, if not defined then
75	//the lockdirectory will be the index directory
76	#define LUCENE_LOCK_DIR_ENV_FALLBACK "/tmp"
77	//
78	////////////////////////////////////////////////////////////////////
79
80
81
82	////////////////////////////////////////////////////////////////////
83	// The following are search query options
84	// THe NO_ options can make CLucene faster and/or smaller*
85	// special queries sometime require longer search times or may
86	// not be required
87	////////////////////////////////////////////////////////////////////
88	//
89	//Define this to remove fuzzy query and sloppy scoring
90	//#define NO_FUZZY_QUERY
91	//
92	//Define to remove wildcard tm or te?m to match term*
93	//#define NO_WILDCARD_QUERY
94	//
95	//Define to remove prefix term query - ter to match term or terms*
96	//#define NO_PREFIX_QUERY
97	//
98	//Define to remove range (exlusive and inclusive)
99	//#define NO_RANGE_QUERY
100	//
101	//This must always be defined. They can be adjusted if required. But
102	//general Wildcard string would be '' and Wildcard Char would be '?'*
103	//Both are Required.
104	#define LUCENE_WILDCARDTERMENUM_WILDCARD_STRING '*'
105	#define LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR '?'
106	//
107	////////////////////////////////////////////////////////////////////
108
109	////////////////////////////////////////////////////////////////////
110	// memory handling configurations
111	////////////////////////////////////////////////////////////////////
112	//
113	//If this is defined, lucene's configurations are changed
114	//to use less memory, but may run slower.
115	//todo: i dont think this actualy changes speed much, just memory
116	#define LUCENE_OPTIMIZE_FOR_MEMORY
117	//
118	//define this if you want the pointer tracking to be enabled
119	//this is a useful tool for memory leak tracking
120	//The LuceneBase can slow down the code a lot
121	#if defined(_DEBUG)
122	#if !defined(LUCENE_DISABLE_MEMTRACKING) && !defined(LUCENE_ENABLE_MEMLEAKTRACKING)
123	#define LUCENE_ENABLE_MEMLEAKTRACKING
124	#endif
125	#endif
126	//
127	//enable use of rich file/line tracking. use CL_FILELINE to pass
128	//to functions like stringDuplicate (or use CL_STRDUP functions instead) and*
129	//CLStringIntern::x.
130	#if defined(LUCENE_ENABLE_MEMLEAKTRACKING)
131	#define LUCENE_ENABLE_FILELINEINFO
132	#endif
133	//
134	//enable creation of clucene.log file. Logs every
135	//call to new operator. Must have LUCENE_ENABLE_MEMLEAKTRACKING enabled.
136	//writes log in this format.
137	//action,file name,file line,allocation size
138	//logging can be disabled by setting _lucene_disable_debuglogging to true
139	#if defined(LUCENE_ENABLE_MEMLEAKTRACKING) && defined(_DEBUG)
140	//#define LUCENE_ENABLE_CONSTRUCTOR_LOG
141	#endif
142	//
143	//
144	//enable this if you want to enable reference counting. This is
145	//not necessary or useful in most cases except when implementing wrappers
146	//which have reference counting. If the wrapper wraps a StringReader,
147	//for example, it should expect that the wrapped StringReader should not
148	//be deleted. However, when the stringreader is added into a Field,
149	//the Field usually takes over the stringReader and deletes it on completion.
150	//If reference counting is enabled, the wrapper can add a reference to any class
151	//and when _CLDECDELETE is called, the reference is decremented and only deleted
152	//if the refcount is zero.
153	#define LUCENE_ENABLE_REFCOUNT
154
155
156	////////////////////////////////////////////////////////////////////
157	// These options allow you to remove certain implementations
158	// out of clucene so that they can be implemented in the client
159	// application
160	////////////////////////////////////////////////////////////////////
161	//
162	//define this to your own setting if you would like to implement your own
163	//threading locking code. it should have the same sort of functions as
164	//mutex_default. If not defined, clucene will try and use posix,win32 critical
165	//sections, or a timer based mutex hack.
166	//#define _LUCENE_THREADMUTEX CL_NS(util)::mutex_default
167	//
168	//define this if you want to implement the _Cnd_OutDebug routine yourself
169	//you can then easily customise in your own application how to handle debug messages
170	//#define _CND_DEBUG_DONTIMPLEMENT_OUTDEBUG
171	//
172	//define this if you want to implement your own namespace macros
173	//#define _LUCENE_DONTIMPLEMENT_NS_MACROS
174	//
175	//define this if you do not want clucene to include any standard libraries.
176	//this could be useful if you want to use alternate libraries
177	//#define LUCENE_DISABLE_INCLUDES
178	//
179	////////////////////////////////////////////////////////////////////
180
181
182	////////////////////////////////////////////////////////////////////
183	// These options will be changed depending on your compiler/platform
184	// but can also be changed here if required
185	////////////////////////////////////////////////////////////////////
186	//
187	//define this if multi-threading support is not required
188	//if not defined, multi-thread locking will
189	//occur (and its related processing overhead)
190	//note: it is recommended to disable multithreading if you do not need it
191	//there is a lot of overhead that can be avoided.
192	//#define _CL_DISABLE_MULTITHREADING
193	//
194	//if you want to define your own default file encoding. specify it
195	//here - normally defined in the platform specific headers
196	//#define PLATFORM_DEFAULT_READER_ENCODING CL_NS(util)::FileReader::ENCODING_ASCII
197	//
198	//disable hash implementations (if available)
199	//#define LUCENE_DISABLE_HASHING
200	////////////////////////////////////////////////////////////////////
201
202
203
204	////////////////////////////////////////////////////////////////////
205	// These options should not be changed. But you can experiment with
206	// them to optimize performance
207	////////////////////////////////////////////////////////////////////
208	//
209	//some defaults, wouldn't usually need to be changed
210	//Buffer size for input/output streams. Required.
211	#define LUCENE_STREAM_BUFFER_SIZE 1024
212	//
213	// DSR:2004.08.19:
214	// Formerly, StringBuffer used 1024 as the default size of its internal buffer.
215	// However, StringBuffer is used primarily for token- and term-oriented
216	// processing, e.g. in StandardTokenizer. I've calculated that the average
217	// token (as produced by StandardTokenizer) in all .txt files distributed in
218	// the Project Gutenberg CD Image (August 2003 release) has only 6 characters.
219	// Although most languages are likely to have a longer average word length than
220	// English due to the popularity of "non-atomized" conjugation and declension
221	// mechanisms, 1024 is still vastly excessive.
222	// I made two changes intended to deliver better overall performance:
223	// a) Switched to a default StringBuffer character capacity of 32. Though 32
224	// is longer than the average token, the high cost of realloc makes a
225	// slightly liberal default size optimal. I chose the default size of 32
226	// after fairly extensive experimentation on the Gutenberg e-texts. The
227	// results are summarized in the following table:
228	// ------------------------------------------------------------------------
229	// LUCENE_DEFAULT_TOKEN_BUFFER_SIZE value \| % faster than default size 1024
230	// ------------------------------------------------------------------------
231	// 8 : 4%
232	// 16 : 7%
233	// 32 : 6%
234	// 64 : 3%
235	// A default size of 32 is actually slightly slower than 16, but I was
236	// experimenting on English text; I expect that 32 will maintain decent
237	// performance in languages such as German, and in technical documents
238	// with long tokens.
239	//
240	// b) To offset the switch to a smaller default buffer size, I implemented a
241	// more aggressive growth strategy. A StringBuffer now [at least] doubles
242	// the size of its internal buffer every time it needs to grow, rather
243	// than [at least] increasing by LUCENE_DEFAULT_TOKEN_BUFFER_SIZE no
244	// matter how many times it has already grown.
245	//Required.
246	#define LUCENE_DEFAULT_TOKEN_BUFFER_SIZE 32
247	//todo: should implement a similar strategy in analysis/token
248	//
249	//Expert: The fraction of {@link TermDocs} entries stored in skip tables,
250	//used to accellerate {@link TermDocs#skipTo(int)}. Larger values result in
251	//smaller indices, greater acceleration, but fewer accelerable cases, while
252	//smaller values result in bigger indices, less acceleration and more
253	//accelerable cases. More detailed experiments would be useful here. /*
254	#define LUCENE_DEFAULT_TERMDOCS_SKIP_INTERVAL 16
255	//
256	//Size of TermScore cache. Required.
257	#define LUCENE_SCORE_CACHE_SIZE 32
258	//
259	//analysis options
260	//maximum length that the CharTokenizer uses. Required.
261	//By adjusting this value, you can greatly improve the performance of searching
262	//and especially indexing. Default is 255, but smaller numbers will decrease
263	//the amount of memory used as well as increasing the speed.
264	#define LUCENE_MAX_WORD_LEN 255
265	//Maximum length of a token word.
266	//Should be the same or more than LUCENE_MAX_WORD_LEN
267	//if not defined, then no token limit, but may be slower
268	//if defined will be faster (up to 15% in some cases), but will use more memory
269	#ifndef LUCENE_OPTIMIZE_FOR_MEMORY
270	#define LUCENE_TOKEN_WORD_LENGTH LUCENE_MAX_WORD_LEN
271	#endif
272	//
273	//maximum field length. some optimisation can be done if a maximum field
274	//length is given... The smaller the better
275	#define LUCENE_MAX_FIELD_LEN 100
276	//
277	//The initial value set to BooleanQuery::maxClauseCount. Default is 1024
278	#define LUCENE_BOOLEANQUERY_MAXCLAUSECOUNT 1024
279	//
280	//bvk: 12.3.2005
281	//==============================================================================
282	//Previously the way the tokenizer has worked has been changed to optionally
283	//use a a fixed word length. I have implemented this in the Term class as well.
284	//It seems that by predefining the text length instead of using new TCHAR[x]
285	//in the constructor greatly improves the performance by 20-30% for certain
286	//operations.
287	//Maximum length of a term text.
288	//Should be the same or more than LUCENE_MAX_WORD_LEN
289	//if not defined, then no term text limit, but may be slower
290	//if defined will be faster (up to 30% in some cases), but will use more memory
291	#ifndef LUCENE_OPTIMIZE_FOR_MEMORY
292	#define LUCENE_TERM_TEXT_LENGTH LUCENE_MAX_WORD_LEN
293	#endif
294	//
295	//Size of the CharTokenizer buffersize. Required.
296	#define LUCENE_IO_BUFFER_SIZE 1024
297	//
298	//the minimum amount the segment term enum should grow by. Must be at least 1
299	#define LUCENE_SEGMENTTERMENUM_GROWSIZE 8
300	//
301	////////////////////////////////////////////////////////////////////
302
303	#endif
304
305