1 | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | /* |
3 | * This file is part of the LibreOffice project. |
4 | * |
5 | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | * |
9 | * This file incorporates work covered by the following license notice: |
10 | * |
11 | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | * contributor license agreements. See the NOTICE file distributed |
13 | * with this work for additional information regarding copyright |
14 | * ownership. The ASF licenses this file to you under the Apache |
15 | * License, Version 2.0 (the "License"); you may not use this file |
16 | * except in compliance with the License. You may obtain a copy of |
17 | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | */ |
19 | |
20 | #include <com/sun/star/uno/Reference.h> |
21 | #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp> |
22 | |
23 | #include <cppuhelper/factory.hxx> |
24 | #include <cppuhelper/supportsservice.hxx> |
25 | #include <com/sun/star/registry/XRegistryKey.hpp> |
26 | #include <i18nlangtag/languagetag.hxx> |
27 | #include <tools/debug.hxx> |
28 | #include <osl/mutex.hxx> |
29 | |
30 | #include <hyphen.h> |
31 | #include <hyphenimp.hxx> |
32 | |
33 | #include <linguistic/hyphdta.hxx> |
34 | #include <rtl/ustring.hxx> |
35 | #include <rtl/ustrbuf.hxx> |
36 | #include <rtl/textenc.h> |
37 | |
38 | #include <linguistic/lngprops.hxx> |
39 | #include <linguistic/misc.hxx> |
40 | #include <unotools/pathoptions.hxx> |
41 | #include <unotools/useroptions.hxx> |
42 | #include <unotools/lingucfg.hxx> |
43 | #include <osl/file.hxx> |
44 | |
45 | #include <stdio.h> |
46 | #include <string.h> |
47 | |
48 | #include <list> |
49 | #include <set> |
50 | |
51 | using namespace utl; |
52 | using namespace osl; |
53 | using namespace com::sun::star; |
54 | using namespace com::sun::star::beans; |
55 | using namespace com::sun::star::lang; |
56 | using namespace com::sun::star::uno; |
57 | using namespace com::sun::star::linguistic2; |
58 | using namespace linguistic; |
59 | |
60 | |
61 | // min, max |
62 | #define Max(a,b) (a > b ? a : b) |
63 | |
64 | /////////////////////////////////////////////////////////////////////////// |
65 | |
66 | |
67 | Hyphenator::Hyphenator() : |
68 | aEvtListeners ( GetLinguMutex() ) |
69 | { |
70 | bDisposing = false; |
71 | pPropHelper = NULL; |
72 | aDicts = NULL; |
73 | numdict = 0; |
74 | } |
75 | |
76 | Hyphenator::~Hyphenator() |
77 | { |
78 | if (numdict && aDicts) |
79 | { |
80 | for (int i=0; i < numdict; ++i) |
81 | { |
82 | delete aDicts[i].apCC; |
83 | if (aDicts[i].aPtr) |
84 | hnj_hyphen_free(aDicts[i].aPtr); |
85 | } |
86 | } |
87 | delete[] aDicts; |
88 | |
89 | if (pPropHelper) |
90 | { |
91 | pPropHelper->RemoveAsPropListener(); |
92 | delete pPropHelper; |
93 | } |
94 | } |
95 | |
96 | PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl() |
97 | { |
98 | if (!pPropHelper) |
99 | { |
100 | Reference< XLinguProperties > xPropSet( GetLinguProperties(), UNO_QUERY ); |
101 | |
102 | pPropHelper = new PropertyHelper_Hyphenation ((XHyphenator *) this, xPropSet ); |
103 | pPropHelper->AddAsPropListener(); //! after a reference is established |
104 | } |
105 | return *pPropHelper; |
106 | } |
107 | |
108 | |
109 | Sequence< Locale > SAL_CALL Hyphenator::getLocales() |
110 | throw(RuntimeException) |
111 | { |
112 | MutexGuard aGuard( GetLinguMutex() ); |
113 | |
114 | // this routine should return the locales supported by the installed |
115 | // dictionaries. |
116 | |
117 | if (!numdict) |
118 | { |
119 | SvtLinguConfig aLinguCfg; |
120 | |
121 | // get list of dictionaries-to-use |
122 | // (or better speaking: the list of dictionaries using the |
123 | // new configuration entries). |
124 | std::list< SvtLinguConfigDictionaryEntry > aDics; |
125 | uno::Sequence< OUString > aFormatList; |
126 | aLinguCfg.GetSupportedDictionaryFormatsFor( "Hyphenators" , |
127 | "org.openoffice.lingu.LibHnjHyphenator" , aFormatList ); |
128 | sal_Int32 nLen = aFormatList.getLength(); |
129 | for (sal_Int32 i = 0; i < nLen; ++i) |
130 | { |
131 | std::vector< SvtLinguConfigDictionaryEntry > aTmpDic( |
132 | aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) ); |
133 | aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() ); |
134 | } |
135 | |
136 | //!! for compatibility with old dictionaries (the ones not using extensions |
137 | //!! or new configuration entries, but still using the dictionary.lst file) |
138 | //!! Get the list of old style spell checking dictionaries to use... |
139 | std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics( |
140 | GetOldStyleDics( "HYPH" ) ); |
141 | |
142 | // to prefer dictionaries with configuration entries we will only |
143 | // use those old style dictionaries that add a language that |
144 | // is not yet supported by the list od new style dictionaries |
145 | MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics ); |
146 | |
147 | numdict = aDics.size(); |
148 | if (numdict) |
149 | { |
150 | // get supported locales from the dictionaries-to-use... |
151 | sal_Int32 k = 0; |
152 | std::set< OUString, lt_rtl_OUString > aLocaleNamesSet; |
153 | std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt; |
154 | for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt) |
155 | { |
156 | uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames ); |
157 | sal_Int32 nLen2 = aLocaleNames.getLength(); |
158 | for (k = 0; k < nLen2; ++k) |
159 | { |
160 | aLocaleNamesSet.insert( aLocaleNames[k] ); |
161 | } |
162 | } |
163 | // ... and add them to the resulting sequence |
164 | aSuppLocales.realloc( aLocaleNamesSet.size() ); |
165 | std::set< OUString, lt_rtl_OUString >::const_iterator aItB; |
166 | k = 0; |
167 | for (aItB = aLocaleNamesSet.begin(); aItB != aLocaleNamesSet.end(); ++aItB) |
168 | { |
169 | Locale aTmp( LanguageTag::convertToLocale( *aItB )); |
170 | aSuppLocales[k++] = aTmp; |
171 | } |
172 | |
173 | //! For each dictionary and each locale we need a separate entry. |
174 | //! If this results in more than one dictionary per locale than (for now) |
175 | //! it is undefined which dictionary gets used. |
176 | //! In the future the implementation should support using several dictionaries |
177 | //! for one locale. |
178 | numdict = 0; |
179 | for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt) |
180 | numdict = numdict + aDictIt->aLocaleNames.getLength(); |
181 | |
182 | // add dictionary information |
183 | aDicts = new HDInfo[numdict]; |
184 | |
185 | k = 0; |
186 | for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt) |
187 | { |
188 | if (aDictIt->aLocaleNames.getLength() > 0 && |
189 | aDictIt->aLocations.getLength() > 0) |
190 | { |
191 | uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames ); |
192 | sal_Int32 nLocales = aLocaleNames.getLength(); |
193 | |
194 | // currently only one language per dictionary is supported in the actual implementation... |
195 | // Thus here we work-around this by adding the same dictionary several times. |
196 | // Once for each of it's supported locales. |
197 | for (sal_Int32 i = 0; i < nLocales; ++i) |
198 | { |
199 | LanguageTag aLanguageTag( aDictIt->aLocaleNames[i] ); |
200 | aDicts[k].aPtr = NULL; |
201 | aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW; |
202 | aDicts[k].aLoc = aLanguageTag.getLocale(); |
203 | aDicts[k].apCC = new CharClass( aLanguageTag ); |
204 | // also both files have to be in the same directory and the |
205 | // file names must only differ in the extension (.aff/.dic). |
206 | // Thus we use the first location only and strip the extension part. |
207 | OUString aLocation = aDictIt->aLocations[0]; |
208 | sal_Int32 nPos = aLocation.lastIndexOf( '.' ); |
209 | aLocation = aLocation.copy( 0, nPos ); |
210 | aDicts[k].aName = aLocation; |
211 | |
212 | ++k; |
213 | } |
214 | } |
215 | } |
216 | DBG_ASSERT( k == numdict, "index mismatch?" ); |
217 | } |
218 | else |
219 | { |
220 | /* no dictionary found so register no dictionaries */ |
221 | numdict = 0; |
222 | aDicts = NULL; |
223 | aSuppLocales.realloc(0); |
224 | } |
225 | } |
226 | |
227 | return aSuppLocales; |
228 | } |
229 | |
230 | |
231 | |
232 | sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale) |
233 | throw(RuntimeException) |
234 | { |
235 | MutexGuard aGuard( GetLinguMutex() ); |
236 | |
237 | sal_Bool bRes = sal_False; |
238 | if (!aSuppLocales.getLength()) |
239 | getLocales(); |
240 | |
241 | const Locale *pLocale = aSuppLocales.getConstArray(); |
242 | sal_Int32 nLen = aSuppLocales.getLength(); |
243 | for (sal_Int32 i = 0; i < nLen; ++i) |
244 | { |
245 | if (rLocale == pLocale[i]) |
246 | { |
247 | bRes = sal_True; |
248 | break; |
249 | } |
250 | } |
251 | return bRes; |
252 | } |
253 | |
254 | |
255 | Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWord, |
256 | const ::com::sun::star::lang::Locale& aLocale, |
257 | sal_Int16 nMaxLeading, |
258 | const ::com::sun::star::beans::PropertyValues& aProperties ) |
259 | throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException) |
260 | { |
261 | int nHyphenationPos = -1; |
262 | int nHyphenationPosAlt = -1; |
263 | int nHyphenationPosAltHyph = -1; |
264 | int wordlen; |
265 | char *hyphens; |
266 | char *lcword; |
267 | int k = 0; |
268 | |
269 | PropertyHelper_Hyphenation& rHelper = GetPropHelper(); |
270 | rHelper.SetTmpPropVals(aProperties); |
271 | sal_Int16 minTrail = rHelper.GetMinTrailing(); |
272 | sal_Int16 minLead = rHelper.GetMinLeading(); |
273 | sal_Int16 minLen = rHelper.GetMinWordLength(); |
274 | |
275 | HyphenDict *dict = NULL; |
276 | rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; |
277 | CharClass * pCC = NULL; |
278 | |
279 | Reference< XHyphenatedWord > xRes; |
280 | |
281 | k = -1; |
282 | for (int j = 0; j < numdict; j++) |
283 | { |
284 | if (aLocale == aDicts[j].aLoc) |
285 | k = j; |
286 | } |
287 | |
288 | // if we have a hyphenation dictionary matching this locale |
289 | if (k != -1) |
290 | { |
291 | // if this dictinary has not been loaded yet do that |
292 | if (!aDicts[k].aPtr) |
293 | { |
294 | OUString DictFN = aDicts[k].aName + ".dic" ; |
295 | OUString dictpath; |
296 | |
297 | osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath ); |
298 | OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) ); |
299 | |
300 | #if defined(WNT) |
301 | // workaround for Windows specific problem that the |
302 | // path length in calls to 'fopen' is limted to somewhat |
303 | // about 120+ characters which will usually be exceed when |
304 | // using dictionaries as extensions. |
305 | sTmp = Win_GetShortPathName( dictpath ); |
306 | #endif |
307 | |
308 | if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL ) |
309 | { |
310 | fprintf(stderr, "Couldn't find file %s\n" , OU2ENC(dictpath, osl_getThreadTextEncoding()) ); |
311 | return NULL; |
312 | } |
313 | aDicts[k].aPtr = dict; |
314 | aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset); |
315 | } |
316 | |
317 | // other wise hyphenate the word with that dictionary |
318 | dict = aDicts[k].aPtr; |
319 | eEnc = aDicts[k].eEnc; |
320 | pCC = aDicts[k].apCC; |
321 | |
322 | // we don't want to work with a default text encoding since following incorrect |
323 | // results may occur only for specific text and thus may be hard to notice. |
324 | // Thus better always make a clean exit here if the text encoding is in question. |
325 | // Hopefully something not working at all will raise proper attention quickly. ;-) |
326 | DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); |
327 | if (eEnc == RTL_TEXTENCODING_DONTKNOW) |
328 | return NULL; |
329 | |
330 | sal_uInt16 ct = capitalType(aWord, pCC); |
331 | |
332 | // first convert any smart quotes or apostrophes to normal ones |
333 | OUStringBuffer rBuf(aWord); |
334 | sal_Int32 nc = rBuf.getLength(); |
335 | sal_Unicode ch; |
336 | for (sal_Int32 ix=0; ix < nc; ix++) |
337 | { |
338 | ch = rBuf[ix]; |
339 | if ((ch == 0x201C) || (ch == 0x201D)) |
340 | rBuf[ix] = (sal_Unicode)0x0022; |
341 | if ((ch == 0x2018) || (ch == 0x2019)) |
342 | rBuf[ix] = (sal_Unicode)0x0027; |
343 | } |
344 | OUString nWord(rBuf.makeStringAndClear()); |
345 | |
346 | // now convert word to all lowercase for pattern recognition |
347 | OUString nTerm(makeLowerCase(nWord, pCC)); |
348 | |
349 | // now convert word to needed encoding |
350 | OString encWord(OU2ENC(nTerm,eEnc)); |
351 | |
352 | wordlen = encWord.getLength(); |
353 | lcword = new char[wordlen + 1]; |
354 | hyphens = new char[wordlen + 5]; |
355 | |
356 | char ** rep = NULL; // replacements of discretionary hyphenation |
357 | int * pos = NULL; // array of [hyphenation point] minus [deletion position] |
358 | int * cut = NULL; // length of deletions in original word |
359 | |
360 | // copy converted word into simple char buffer |
361 | strcpy(lcword,encWord.getStr()); |
362 | |
363 | // now strip off any ending periods |
364 | int n = wordlen-1; |
365 | while((n >=0) && (lcword[n] == '.')) |
366 | n--; |
367 | n++; |
368 | if (n > 0) |
369 | { |
370 | const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword, n, hyphens, NULL, |
371 | &rep, &pos, &cut, minLead, minTrail, |
372 | Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))), |
373 | Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) ); |
374 | if (bFailed) |
375 | { |
376 | //whoops something did not work |
377 | delete[] hyphens; |
378 | delete[] lcword; |
379 | if (rep) |
380 | { |
381 | for(int j = 0; j < n; j++) |
382 | { |
383 | if (rep[j]) free(rep[j]); |
384 | } |
385 | free(rep); |
386 | } |
387 | if (pos) free(pos); |
388 | if (cut) free(cut); |
389 | return NULL; |
390 | } |
391 | } |
392 | |
393 | // now backfill hyphens[] for any removed trailing periods |
394 | for (int c = n; c < wordlen; c++) hyphens[c] = '0'; |
395 | hyphens[wordlen] = '\0'; |
396 | |
397 | sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading ); |
398 | |
399 | for (sal_Int32 i = 0; i < n; i++) |
400 | { |
401 | int leftrep = 0; |
402 | sal_Bool hit = (n >= minLen); |
403 | if (!rep || !rep[i] || (i >= n)) |
404 | { |
405 | hit = hit && (hyphens[i]&1) && (i < Leading); |
406 | hit = hit && (i >= (minLead-1) ); |
407 | hit = hit && ((n - i - 1) >= minTrail); |
408 | } |
409 | else |
410 | { |
411 | // calculate change character length before hyphenation point signed with '=' |
412 | for (char * c = rep[i]; *c && (*c != '='); c++) |
413 | { |
414 | if (eEnc == RTL_TEXTENCODING_UTF8) |
415 | { |
416 | if (((unsigned char) *c) >> 6 != 2) |
417 | leftrep++; |
418 | } |
419 | else |
420 | leftrep++; |
421 | } |
422 | hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading); |
423 | hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) ); |
424 | hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail); |
425 | } |
426 | if (hit) |
427 | { |
428 | nHyphenationPos = i; |
429 | if (rep && (i < n) && rep[i]) |
430 | { |
431 | nHyphenationPosAlt = i - pos[i]; |
432 | nHyphenationPosAltHyph = i + leftrep - pos[i]; |
433 | } |
434 | } |
435 | } |
436 | |
437 | if (nHyphenationPos == -1) |
438 | { |
439 | xRes = NULL; |
440 | } |
441 | else |
442 | { |
443 | if (rep && rep[nHyphenationPos]) |
444 | { |
445 | // remove equal sign |
446 | char * s = rep[nHyphenationPos]; |
447 | int eq = 0; |
448 | for (; *s; s++) |
449 | { |
450 | if (*s == '=') eq = 1; |
451 | if (eq) *s = *(s + 1); |
452 | } |
453 | OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc); |
454 | OUString repHyph; |
455 | switch (ct) |
456 | { |
457 | case CAPTYPE_ALLCAP: |
458 | { |
459 | repHyph = makeUpperCase(repHyphlow, pCC); |
460 | break; |
461 | } |
462 | case CAPTYPE_INITCAP: |
463 | { |
464 | if (nHyphenationPosAlt == -1) |
465 | repHyph = makeInitCap(repHyphlow, pCC); |
466 | else |
467 | repHyph = repHyphlow; |
468 | break; |
469 | } |
470 | default: |
471 | { |
472 | repHyph = repHyphlow; |
473 | break; |
474 | } |
475 | } |
476 | |
477 | // handle shortening |
478 | sal_Int16 nPos = (sal_Int16) ((nHyphenationPosAltHyph < nHyphenationPos) ? |
479 | nHyphenationPosAltHyph : nHyphenationPos); |
480 | // dicretionary hyphenation |
481 | xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos, |
482 | aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph), |
483 | (sal_Int16) nHyphenationPosAltHyph); |
484 | } |
485 | else |
486 | { |
487 | xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), |
488 | (sal_Int16)nHyphenationPos, aWord, (sal_Int16) nHyphenationPos); |
489 | } |
490 | } |
491 | |
492 | delete[] lcword; |
493 | delete[] hyphens; |
494 | if (rep) |
495 | { |
496 | for(int j = 0; j < n; j++) |
497 | { |
498 | if (rep[j]) free(rep[j]); |
499 | } |
500 | free(rep); |
501 | } |
502 | if (pos) free(pos); |
503 | if (cut) free(cut); |
504 | return xRes; |
505 | } |
506 | return NULL; |
507 | } |
508 | |
509 | |
510 | Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling( |
511 | const OUString& aWord, |
512 | const ::com::sun::star::lang::Locale& aLocale, |
513 | sal_Int16 nIndex, |
514 | const ::com::sun::star::beans::PropertyValues& aProperties ) |
515 | throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException) |
516 | { |
517 | // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point: |
518 | for (int = 1; extrachar <= 2; extrachar++) |
519 | { |
520 | Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties); |
521 | if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex) |
522 | return xRes; |
523 | } |
524 | return NULL; |
525 | } |
526 | |
527 | Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord, |
528 | const ::com::sun::star::lang::Locale& aLocale, |
529 | const ::com::sun::star::beans::PropertyValues& aProperties ) |
530 | throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException) |
531 | { |
532 | PropertyHelper_Hyphenation& rHelper = GetPropHelper(); |
533 | rHelper.SetTmpPropVals(aProperties); |
534 | sal_Int16 minTrail = rHelper.GetMinTrailing(); |
535 | sal_Int16 minLead = rHelper.GetMinLeading(); |
536 | sal_Int16 minLen = rHelper.GetMinWordLength(); |
537 | |
538 | //Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as |
539 | //well as "hyphenate" |
540 | if (aWord.getLength() < minLen) |
541 | { |
542 | return PossibleHyphens::CreatePossibleHyphens( aWord, LinguLocaleToLanguage( aLocale ), |
543 | aWord, Sequence< sal_Int16 >() ); |
544 | } |
545 | |
546 | int k = -1; |
547 | for (int j = 0; j < numdict; j++) |
548 | { |
549 | if (aLocale == aDicts[j].aLoc) k = j; |
550 | } |
551 | |
552 | // if we have a hyphenation dictionary matching this locale |
553 | if (k != -1) |
554 | { |
555 | HyphenDict *dict = NULL; |
556 | // if this dictioanry has not been loaded yet do that |
557 | if (!aDicts[k].aPtr) |
558 | { |
559 | OUString DictFN = aDicts[k].aName + ".dic" ; |
560 | OUString dictpath; |
561 | |
562 | osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath ); |
563 | OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) ); |
564 | |
565 | #if defined(WNT) |
566 | // workaround for Windows specific problem that the |
567 | // path length in calls to 'fopen' is limted to somewhat |
568 | // about 120+ characters which will usually be exceed when |
569 | // using dictionaries as extensions. |
570 | sTmp = Win_GetShortPathName( dictpath ); |
571 | #endif |
572 | |
573 | if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL ) |
574 | { |
575 | fprintf(stderr, "Couldn't find file %s and %s\n" , sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) ); |
576 | return NULL; |
577 | } |
578 | aDicts[k].aPtr = dict; |
579 | aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset); |
580 | } |
581 | |
582 | // other wise hyphenate the word with that dictionary |
583 | dict = aDicts[k].aPtr; |
584 | rtl_TextEncoding eEnc = aDicts[k].eEnc; |
585 | CharClass* pCC = aDicts[k].apCC; |
586 | |
587 | // we don't want to work with a default text encoding since following incorrect |
588 | // results may occur only for specific text and thus may be hard to notice. |
589 | // Thus better always make a clean exit here if the text encoding is in question. |
590 | // Hopefully something not working at all will raise proper attention quickly. ;-) |
591 | DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" ); |
592 | if (eEnc == RTL_TEXTENCODING_DONTKNOW) |
593 | return NULL; |
594 | |
595 | // first handle smart quotes both single and double |
596 | OUStringBuffer rBuf(aWord); |
597 | sal_Int32 nc = rBuf.getLength(); |
598 | sal_Unicode ch; |
599 | for (sal_Int32 ix=0; ix < nc; ix++) |
600 | { |
601 | ch = rBuf[ix]; |
602 | if ((ch == 0x201C) || (ch == 0x201D)) |
603 | rBuf[ix] = (sal_Unicode)0x0022; |
604 | if ((ch == 0x2018) || (ch == 0x2019)) |
605 | rBuf[ix] = (sal_Unicode)0x0027; |
606 | } |
607 | OUString nWord(rBuf.makeStringAndClear()); |
608 | |
609 | // now convert word to all lowercase for pattern recognition |
610 | OUString nTerm(makeLowerCase(nWord, pCC)); |
611 | |
612 | // now convert word to needed encoding |
613 | OString encWord(OU2ENC(nTerm,eEnc)); |
614 | |
615 | int wordlen = encWord.getLength(); |
616 | char *lcword = new char[wordlen+1]; |
617 | char *hyphens = new char[wordlen+5]; |
618 | char ** rep = NULL; // replacements of discretionary hyphenation |
619 | int * pos = NULL; // array of [hyphenation point] minus [deletion position] |
620 | int * cut = NULL; // length of deletions in original word |
621 | |
622 | // copy converted word into simple char buffer |
623 | strcpy(lcword,encWord.getStr()); |
624 | |
625 | // first remove any trailing periods |
626 | int n = wordlen-1; |
627 | while((n >=0) && (lcword[n] == '.')) |
628 | n--; |
629 | n++; |
630 | if (n > 0) |
631 | { |
632 | const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL, |
633 | &rep, &pos, &cut, minLead, minTrail, |
634 | Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))), |
635 | Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) ); |
636 | if (bFailed) |
637 | { |
638 | delete[] hyphens; |
639 | delete[] lcword; |
640 | |
641 | if (rep) |
642 | { |
643 | for(int j = 0; j < n; j++) |
644 | { |
645 | if (rep[j]) free(rep[j]); |
646 | } |
647 | free(rep); |
648 | } |
649 | if (pos) free(pos); |
650 | if (cut) free(cut); |
651 | |
652 | return NULL; |
653 | } |
654 | } |
655 | // now backfill hyphens[] for any removed periods |
656 | for (int c = n; c < wordlen; c++) |
657 | hyphens[c] = '0'; |
658 | hyphens[wordlen] = '\0'; |
659 | |
660 | sal_Int16 nHyphCount = 0; |
661 | sal_Int16 i; |
662 | |
663 | for ( i = 0; i < encWord.getLength(); i++) |
664 | { |
665 | if (hyphens[i]&1) |
666 | nHyphCount++; |
667 | } |
668 | |
669 | Sequence< sal_Int16 > aHyphPos(nHyphCount); |
670 | sal_Int16 *pPos = aHyphPos.getArray(); |
671 | OUStringBuffer hyphenatedWordBuffer; |
672 | nHyphCount = 0; |
673 | |
674 | for (i = 0; i < nWord.getLength(); i++) |
675 | { |
676 | hyphenatedWordBuffer.append(aWord[i]); |
677 | // hyphenation position |
678 | if (hyphens[i]&1) |
679 | { |
680 | pPos[nHyphCount] = i; |
681 | hyphenatedWordBuffer.append('='); |
682 | nHyphCount++; |
683 | } |
684 | } |
685 | |
686 | OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear(); |
687 | |
688 | Reference< XPossibleHyphens > xRes = PossibleHyphens::CreatePossibleHyphens( |
689 | aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos); |
690 | |
691 | delete[] hyphens; |
692 | delete[] lcword; |
693 | |
694 | if (rep) |
695 | { |
696 | for(int j = 0; j < n; j++) |
697 | { |
698 | if (rep[j]) free(rep[j]); |
699 | } |
700 | free(rep); |
701 | } |
702 | if (pos) free(pos); |
703 | if (cut) free(cut); |
704 | |
705 | return xRes; |
706 | } |
707 | |
708 | return NULL; |
709 | } |
710 | |
711 | OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC) |
712 | { |
713 | if (pCC) |
714 | return pCC->lowercase(aTerm); |
715 | return aTerm; |
716 | } |
717 | |
718 | OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC) |
719 | { |
720 | if (pCC) |
721 | return pCC->uppercase(aTerm); |
722 | return aTerm; |
723 | } |
724 | |
725 | |
726 | OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC) |
727 | { |
728 | sal_Int32 tlen = aTerm.getLength(); |
729 | if ((pCC) && (tlen)) |
730 | { |
731 | OUString bTemp = aTerm.copy(0,1); |
732 | if (tlen > 1) |
733 | return ( pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm,1,(tlen-1)) ); |
734 | |
735 | return pCC->uppercase(bTemp, 0, 1); |
736 | } |
737 | return aTerm; |
738 | } |
739 | |
740 | |
741 | Reference< XInterface > SAL_CALL Hyphenator_CreateInstance( |
742 | const Reference< XMultiServiceFactory > & /*rSMgr*/ ) |
743 | throw(Exception) |
744 | { |
745 | Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator; |
746 | return xService; |
747 | } |
748 | |
749 | |
750 | sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener( |
751 | const Reference< XLinguServiceEventListener >& rxLstnr ) |
752 | throw(RuntimeException) |
753 | { |
754 | MutexGuard aGuard( GetLinguMutex() ); |
755 | |
756 | sal_Bool bRes = sal_False; |
757 | if (!bDisposing && rxLstnr.is()) |
758 | { |
759 | bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr ); |
760 | } |
761 | return bRes; |
762 | } |
763 | |
764 | |
765 | sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener( |
766 | const Reference< XLinguServiceEventListener >& rxLstnr ) |
767 | throw(RuntimeException) |
768 | { |
769 | MutexGuard aGuard( GetLinguMutex() ); |
770 | |
771 | sal_Bool bRes = sal_False; |
772 | if (!bDisposing && rxLstnr.is()) |
773 | { |
774 | bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr ); |
775 | } |
776 | return bRes; |
777 | } |
778 | |
779 | |
780 | OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ ) |
781 | throw(RuntimeException) |
782 | { |
783 | MutexGuard aGuard( GetLinguMutex() ); |
784 | return OUString( "Libhyphen Hyphenator" ); |
785 | } |
786 | |
787 | |
788 | void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments ) |
789 | throw(Exception, RuntimeException) |
790 | { |
791 | MutexGuard aGuard( GetLinguMutex() ); |
792 | |
793 | if (!pPropHelper) |
794 | { |
795 | sal_Int32 nLen = rArguments.getLength(); |
796 | if (2 == nLen) |
797 | { |
798 | Reference< XLinguProperties > xPropSet; |
799 | rArguments.getConstArray()[0] >>= xPropSet; |
800 | //rArguments.getConstArray()[1] >>= xDicList; |
801 | |
802 | //! Pointer allows for access of the non-UNO functions. |
803 | //! And the reference to the UNO-functions while increasing |
804 | //! the ref-count and will implicitly free the memory |
805 | //! when the object is not longer used. |
806 | pPropHelper = new PropertyHelper_Hyphenation( (XHyphenator *) this, xPropSet ); |
807 | pPropHelper->AddAsPropListener(); //! after a reference is established |
808 | } |
809 | else { |
810 | OSL_FAIL( "wrong number of arguments in sequence" ); |
811 | } |
812 | } |
813 | } |
814 | |
815 | |
816 | void SAL_CALL Hyphenator::dispose() |
817 | throw(RuntimeException) |
818 | { |
819 | MutexGuard aGuard( GetLinguMutex() ); |
820 | |
821 | if (!bDisposing) |
822 | { |
823 | bDisposing = true; |
824 | EventObject aEvtObj( (XHyphenator *) this ); |
825 | aEvtListeners.disposeAndClear( aEvtObj ); |
826 | if (pPropHelper) |
827 | { |
828 | pPropHelper->RemoveAsPropListener(); |
829 | delete pPropHelper; |
830 | pPropHelper = NULL; |
831 | } |
832 | } |
833 | } |
834 | |
835 | |
836 | void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener ) |
837 | throw(RuntimeException) |
838 | { |
839 | MutexGuard aGuard( GetLinguMutex() ); |
840 | |
841 | if (!bDisposing && rxListener.is()) |
842 | aEvtListeners.addInterface( rxListener ); |
843 | } |
844 | |
845 | |
846 | void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener ) |
847 | throw(RuntimeException) |
848 | { |
849 | MutexGuard aGuard( GetLinguMutex() ); |
850 | |
851 | if (!bDisposing && rxListener.is()) |
852 | aEvtListeners.removeInterface( rxListener ); |
853 | } |
854 | |
855 | // Service specific part |
856 | OUString SAL_CALL Hyphenator::getImplementationName() |
857 | throw(RuntimeException) |
858 | { |
859 | MutexGuard aGuard( GetLinguMutex() ); |
860 | |
861 | return getImplementationName_Static(); |
862 | } |
863 | |
864 | sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName ) |
865 | throw(RuntimeException) |
866 | { |
867 | return cppu::supportsService(this, ServiceName); |
868 | } |
869 | |
870 | Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames() |
871 | throw(RuntimeException) |
872 | { |
873 | MutexGuard aGuard( GetLinguMutex() ); |
874 | |
875 | return getSupportedServiceNames_Static(); |
876 | } |
877 | |
878 | Sequence< OUString > Hyphenator::getSupportedServiceNames_Static() |
879 | throw() |
880 | { |
881 | MutexGuard aGuard( GetLinguMutex() ); |
882 | |
883 | Sequence< OUString > aSNS( 1 ); // auch mehr als 1 Service moeglich |
884 | aSNS.getArray()[0] = SN_HYPHENATOR; |
885 | return aSNS; |
886 | } |
887 | |
888 | void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName, |
889 | XMultiServiceFactory * pServiceManager, void * ) |
890 | { |
891 | void * pRet = 0; |
892 | if ( Hyphenator::getImplementationName_Static().equalsAscii( pImplName ) ) |
893 | { |
894 | Reference< XSingleServiceFactory > xFactory = |
895 | cppu::createOneInstanceFactory( |
896 | pServiceManager, |
897 | Hyphenator::getImplementationName_Static(), |
898 | Hyphenator_CreateInstance, |
899 | Hyphenator::getSupportedServiceNames_Static()); |
900 | // acquire, because we return an interface pointer instead of a reference |
901 | xFactory->acquire(); |
902 | pRet = xFactory.get(); |
903 | } |
904 | return pRet; |
905 | } |
906 | |
907 | |
908 | /////////////////////////////////////////////////////////////////////////// |
909 | |
910 | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |
911 | |