1 | /*------------------------------------------------------------------------------ |
2 | * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team |
3 | * |
4 | * Distributable under the terms of either the Apache License (Version 2.0) or |
5 | * the GNU Lesser General Public License, as specified in the COPYING file. |
6 | ------------------------------------------------------------------------------*/ |
7 | #ifndef _lucene_Config_ |
8 | #define _lucene_Config_ |
9 | |
10 | |
11 | //////////////////////////////////////////////////////////////////// |
12 | // this settings should be set up in the compiler, |
13 | // but are put here for reference as to what could be defined |
14 | //////////////////////////////////////////////////////////////////// |
15 | // |
16 | //define this if you want debugging code to be enabled |
17 | //#define _DEBUG |
18 | // |
19 | //define this if you want condition debugging to be enabled |
20 | #if defined(_DEBUG) && !defined(_CL__CND_DEBUG) |
21 | #define _CL__CND_DEBUG |
22 | #endif |
23 | // |
24 | //define this to print out lots of information about merges, etc |
25 | //requires __CL__CND_DEBUG to be defined |
26 | //#define _CL_DEBUG_INFO stdout |
27 | // |
28 | //to disable namespaces define this |
29 | //#define DISABLE_NAMESPACE |
30 | // |
31 | //This is mostly for windows. If you have put the google sparse |
32 | //map code in your include path somewhere, then define this |
33 | //to use it. |
34 | //However, for msvc, there are no significant gains since there |
35 | //is already a compatible hashmap available. |
36 | //#define _CL_HAVE_GOOGLE_DENSE_HASH_MAP |
37 | // |
38 | //////////////////////////////////////////////////////////////////// |
39 | |
40 | //////////////////////////////////////////////////////////////////// |
41 | // These options can be set depending on the particular needs of |
42 | // Your application |
43 | //////////////////////////////////////////////////////////////////// |
44 | // |
45 | //define this to force the build into ascii mode |
46 | //#define _ASCII |
47 | // |
48 | //define this to force the build into ucs2 mode |
49 | //#define _UCS2 |
50 | // |
51 | //if a wide character is being converted to a ascii character and it |
52 | //cannot fit, this character is used instead. Required. |
53 | #define LUCENE_OOR_CHAR(c) ((char)(((unsigned short)c)&0xFF)) |
54 | // |
55 | //define if you would like to force clucene to use the internal |
56 | //character functions. |
57 | //Tests may display unpredictable behaviour if this is not defined. |
58 | #define LUCENE_USE_INTERNAL_CHAR_FUNCTIONS |
59 | // |
60 | //define this to enable mmap support in the fsdirectory IndexInput |
61 | //todo: only available for windows so far...need to add MMapInput.cpp to project |
62 | //EXPERIMENTAL |
63 | //#define LUCENE_FS_MMAP |
64 | // |
65 | //LOCK_DIR implementation: |
66 | //define this to set an exact directory for the lock dir (not recommended) |
67 | //all other methods of getting the temporary directory will be ignored |
68 | //#define LUCENE_LOCK_DIR "/tmp" |
69 | // |
70 | //define this to try and load the lock dir from this specified environment variable |
71 | #define LUCENE_LOCK_DIR_ENV_1 "TEMP" |
72 | //define this if you want to have look up this environment variable if the first one fails |
73 | #define LUCENE_LOCK_DIR_ENV_2 "TMP" |
74 | //define this if you want to have a fallback directory, if not defined then |
75 | //the lockdirectory will be the index directory |
76 | #define LUCENE_LOCK_DIR_ENV_FALLBACK "/tmp" |
77 | // |
78 | //////////////////////////////////////////////////////////////////// |
79 | |
80 | |
81 | |
82 | //////////////////////////////////////////////////////////////////// |
83 | // The following are search query options |
84 | // THe NO_* options can make CLucene faster and/or smaller |
85 | // special queries sometime require longer search times or may |
86 | // not be required |
87 | //////////////////////////////////////////////////////////////////// |
88 | // |
89 | //Define this to remove fuzzy query and sloppy scoring |
90 | //#define NO_FUZZY_QUERY |
91 | // |
92 | //Define to remove wildcard t*m or te?m to match term |
93 | //#define NO_WILDCARD_QUERY |
94 | // |
95 | //Define to remove prefix term query - ter* to match term or terms |
96 | //#define NO_PREFIX_QUERY |
97 | // |
98 | //Define to remove range (exlusive and inclusive) |
99 | //#define NO_RANGE_QUERY |
100 | // |
101 | //This must always be defined. They can be adjusted if required. But |
102 | //general Wildcard string would be '*' and Wildcard Char would be '?' |
103 | //Both are Required. |
104 | #define LUCENE_WILDCARDTERMENUM_WILDCARD_STRING '*' |
105 | #define LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR '?' |
106 | // |
107 | //////////////////////////////////////////////////////////////////// |
108 | |
109 | //////////////////////////////////////////////////////////////////// |
110 | // memory handling configurations |
111 | //////////////////////////////////////////////////////////////////// |
112 | // |
113 | //If this is defined, lucene's configurations are changed |
114 | //to use less memory, but may run slower. |
115 | //todo: i dont think this actualy changes speed much, just memory |
116 | #define LUCENE_OPTIMIZE_FOR_MEMORY |
117 | // |
118 | //define this if you want the pointer tracking to be enabled |
119 | //this is a useful tool for memory leak tracking |
120 | //The LuceneBase can slow down the code a *lot* |
121 | #if defined(_DEBUG) |
122 | #if !defined(LUCENE_DISABLE_MEMTRACKING) && !defined(LUCENE_ENABLE_MEMLEAKTRACKING) |
123 | #define LUCENE_ENABLE_MEMLEAKTRACKING |
124 | #endif |
125 | #endif |
126 | // |
127 | //enable use of rich file/line tracking. use CL_FILELINE to pass |
128 | //to functions like stringDuplicate (or use CL_STRDUP* functions instead) and |
129 | //CLStringIntern::x. |
130 | #if defined(LUCENE_ENABLE_MEMLEAKTRACKING) |
131 | #define LUCENE_ENABLE_FILELINEINFO |
132 | #endif |
133 | // |
134 | //enable creation of clucene.log file. Logs every |
135 | //call to new operator. Must have LUCENE_ENABLE_MEMLEAKTRACKING enabled. |
136 | //writes log in this format. |
137 | //action,file name,file line,allocation size |
138 | //logging can be disabled by setting _lucene_disable_debuglogging to true |
139 | #if defined(LUCENE_ENABLE_MEMLEAKTRACKING) && defined(_DEBUG) |
140 | //#define LUCENE_ENABLE_CONSTRUCTOR_LOG |
141 | #endif |
142 | // |
143 | // |
144 | //enable this if you want to enable reference counting. This is |
145 | //not necessary or useful in most cases except when implementing wrappers |
146 | //which have reference counting. If the wrapper wraps a StringReader, |
147 | //for example, it should expect that the wrapped StringReader should not |
148 | //be deleted. However, when the stringreader is added into a Field, |
149 | //the Field usually takes over the stringReader and deletes it on completion. |
150 | //If reference counting is enabled, the wrapper can add a reference to any class |
151 | //and when _CLDECDELETE is called, the reference is decremented and only deleted |
152 | //if the refcount is zero. |
153 | #define LUCENE_ENABLE_REFCOUNT |
154 | |
155 | |
156 | //////////////////////////////////////////////////////////////////// |
157 | // These options allow you to remove certain implementations |
158 | // out of clucene so that they can be implemented in the client |
159 | // application |
160 | //////////////////////////////////////////////////////////////////// |
161 | // |
162 | //define this to your own setting if you would like to implement your own |
163 | //threading locking code. it should have the same sort of functions as |
164 | //mutex_default. If not defined, clucene will try and use posix,win32 critical |
165 | //sections, or a timer based mutex hack. |
166 | //#define _LUCENE_THREADMUTEX CL_NS(util)::mutex_default |
167 | // |
168 | //define this if you want to implement the _Cnd_OutDebug routine yourself |
169 | //you can then easily customise in your own application how to handle debug messages |
170 | //#define _CND_DEBUG_DONTIMPLEMENT_OUTDEBUG |
171 | // |
172 | //define this if you want to implement your own namespace macros |
173 | //#define _LUCENE_DONTIMPLEMENT_NS_MACROS |
174 | // |
175 | //define this if you do not want clucene to include any standard libraries. |
176 | //this could be useful if you want to use alternate libraries |
177 | //#define LUCENE_DISABLE_INCLUDES |
178 | // |
179 | //////////////////////////////////////////////////////////////////// |
180 | |
181 | |
182 | //////////////////////////////////////////////////////////////////// |
183 | // These options will be changed depending on your compiler/platform |
184 | // but can also be changed here if required |
185 | //////////////////////////////////////////////////////////////////// |
186 | // |
187 | //define this if multi-threading support is not required |
188 | //if not defined, multi-thread locking will |
189 | //occur (and its related processing overhead) |
190 | //note: it is recommended to disable multithreading if you do not need it |
191 | //there is a lot of overhead that can be avoided. |
192 | //#define _CL_DISABLE_MULTITHREADING |
193 | // |
194 | //if you want to define your own default file encoding. specify it |
195 | //here - normally defined in the platform specific headers |
196 | //#define PLATFORM_DEFAULT_READER_ENCODING CL_NS(util)::FileReader::ENCODING_ASCII |
197 | // |
198 | //disable hash implementations (if available) |
199 | //#define LUCENE_DISABLE_HASHING |
200 | //////////////////////////////////////////////////////////////////// |
201 | |
202 | |
203 | |
204 | //////////////////////////////////////////////////////////////////// |
205 | // These options should not be changed. But you can experiment with |
206 | // them to optimize performance |
207 | //////////////////////////////////////////////////////////////////// |
208 | // |
209 | //some defaults, wouldn't usually need to be changed |
210 | //Buffer size for input/output streams. Required. |
211 | #define LUCENE_STREAM_BUFFER_SIZE 1024 |
212 | // |
213 | // DSR:2004.08.19: |
214 | // Formerly, StringBuffer used 1024 as the default size of its internal buffer. |
215 | // However, StringBuffer is used primarily for token- and term-oriented |
216 | // processing, e.g. in StandardTokenizer. I've calculated that the average |
217 | // token (as produced by StandardTokenizer) in all .txt files distributed in |
218 | // the Project Gutenberg CD Image (August 2003 release) has only 6 characters. |
219 | // Although most languages are likely to have a longer average word length than |
220 | // English due to the popularity of "non-atomized" conjugation and declension |
221 | // mechanisms, 1024 is still vastly excessive. |
222 | // I made two changes intended to deliver better overall performance: |
223 | // a) Switched to a default StringBuffer character capacity of 32. Though 32 |
224 | // is longer than the average token, the high cost of realloc makes a |
225 | // slightly liberal default size optimal. I chose the default size of 32 |
226 | // after fairly extensive experimentation on the Gutenberg e-texts. The |
227 | // results are summarized in the following table: |
228 | // ------------------------------------------------------------------------ |
229 | // LUCENE_DEFAULT_TOKEN_BUFFER_SIZE value | % faster than default size 1024 |
230 | // ------------------------------------------------------------------------ |
231 | // 8 : 4% |
232 | // 16 : 7% |
233 | // 32 : 6% |
234 | // 64 : 3% |
235 | // A default size of 32 is actually slightly slower than 16, but I was |
236 | // experimenting on English text; I expect that 32 will maintain decent |
237 | // performance in languages such as German, and in technical documents |
238 | // with long tokens. |
239 | // |
240 | // b) To offset the switch to a smaller default buffer size, I implemented a |
241 | // more aggressive growth strategy. A StringBuffer now [at least] doubles |
242 | // the size of its internal buffer every time it needs to grow, rather |
243 | // than [at least] increasing by LUCENE_DEFAULT_TOKEN_BUFFER_SIZE no |
244 | // matter how many times it has already grown. |
245 | //Required. |
246 | #define LUCENE_DEFAULT_TOKEN_BUFFER_SIZE 32 |
247 | //todo: should implement a similar strategy in analysis/token |
248 | // |
249 | //Expert: The fraction of {@link TermDocs} entries stored in skip tables, |
250 | //used to accellerate {@link TermDocs#skipTo(int)}. Larger values result in |
251 | //smaller indices, greater acceleration, but fewer accelerable cases, while |
252 | //smaller values result in bigger indices, less acceleration and more |
253 | //accelerable cases. More detailed experiments would be useful here. */ |
254 | #define LUCENE_DEFAULT_TERMDOCS_SKIP_INTERVAL 16 |
255 | // |
256 | //Size of TermScore cache. Required. |
257 | #define LUCENE_SCORE_CACHE_SIZE 32 |
258 | // |
259 | //analysis options |
260 | //maximum length that the CharTokenizer uses. Required. |
261 | //By adjusting this value, you can greatly improve the performance of searching |
262 | //and especially indexing. Default is 255, but smaller numbers will decrease |
263 | //the amount of memory used as well as increasing the speed. |
264 | #define LUCENE_MAX_WORD_LEN 255 |
265 | //Maximum length of a token word. |
266 | //Should be the same or more than LUCENE_MAX_WORD_LEN |
267 | //if not defined, then no token limit, but may be slower |
268 | //if defined will be faster (up to 15% in some cases), but will use more memory |
269 | #ifndef LUCENE_OPTIMIZE_FOR_MEMORY |
270 | #define LUCENE_TOKEN_WORD_LENGTH LUCENE_MAX_WORD_LEN |
271 | #endif |
272 | // |
273 | //maximum field length. some optimisation can be done if a maximum field |
274 | //length is given... The smaller the better |
275 | #define LUCENE_MAX_FIELD_LEN 100 |
276 | // |
277 | //The initial value set to BooleanQuery::maxClauseCount. Default is 1024 |
278 | #define LUCENE_BOOLEANQUERY_MAXCLAUSECOUNT 1024 |
279 | // |
280 | //bvk: 12.3.2005 |
281 | //============================================================================== |
282 | //Previously the way the tokenizer has worked has been changed to optionally |
283 | //use a a fixed word length. I have implemented this in the Term class as well. |
284 | //It seems that by predefining the text length instead of using new TCHAR[x] |
285 | //in the constructor greatly improves the performance by 20-30% for certain |
286 | //operations. |
287 | //Maximum length of a term text. |
288 | //Should be the same or more than LUCENE_MAX_WORD_LEN |
289 | //if not defined, then no term text limit, but may be slower |
290 | //if defined will be faster (up to 30% in some cases), but will use more memory |
291 | #ifndef LUCENE_OPTIMIZE_FOR_MEMORY |
292 | #define LUCENE_TERM_TEXT_LENGTH LUCENE_MAX_WORD_LEN |
293 | #endif |
294 | // |
295 | //Size of the CharTokenizer buffersize. Required. |
296 | #define LUCENE_IO_BUFFER_SIZE 1024 |
297 | // |
298 | //the minimum amount the segment term enum should grow by. Must be at least 1 |
299 | #define LUCENE_SEGMENTTERMENUM_GROWSIZE 8 |
300 | // |
301 | //////////////////////////////////////////////////////////////////// |
302 | |
303 | #endif |
304 | |
305 | |