1 | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | /* |
3 | * This file is part of the LibreOffice project. |
4 | * |
5 | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | */ |
9 | |
10 | #ifndef INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX |
11 | #define INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX |
12 | |
13 | #include <sal/config.h> |
14 | #include <rtl/ustring.hxx> |
15 | #include <com/sun/star/lang/Locale.hpp> |
16 | #include <i18nlangtag/i18nlangtagdllapi.h> |
17 | #include <i18nlangtag/lang.h> |
18 | |
19 | #include <boost/shared_ptr.hpp> |
20 | #include <vector> |
21 | |
22 | typedef struct _rtl_Locale rtl_Locale; // as in rtl/locale.h |
23 | |
24 | |
25 | /** The ISO 639-2 code reserved for local use used to indicate that a |
26 | com::sun::star::Locale contains a BCP 47 string in its Variant field. The |
27 | Locale's Language field then will contain this language code. |
28 | |
29 | @see LanguageTag::getLocale() |
30 | |
31 | Avoid use, only needed internally or if conversion from Locale to |
32 | LanguageTag is not wanted, i.e. during ODF import. To check whether a |
33 | LanguageTag contains a plain language/country combination or a more |
34 | detailed BCP 47 language tag use LanguageTag::isIsoLocale() instead. |
35 | */ |
36 | #define I18NLANGTAG_QLT "qlt" |
37 | |
38 | |
39 | class LanguageTagImpl; |
40 | |
41 | |
42 | /** Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and |
43 | conversions in between. |
44 | |
45 | Note that member variables are mutable and may change their values even in |
46 | const methods. Getter methods return either the original value or matching |
47 | converted values. |
48 | |
49 | For standalone conversions if no LanguageTag instance is at hand, static |
50 | convertTo...() methods exist. |
51 | */ |
52 | class I18NLANGTAG_DLLPUBLIC LanguageTag |
53 | { |
54 | friend class LanguageTagImpl; |
55 | |
56 | public: |
57 | |
58 | /** Init LanguageTag with existing BCP 47 language tag string. |
59 | |
60 | @param bCanonicalize |
61 | If TRUE, canonicalize tag and reparse, the resulting tag string may |
62 | be different. |
63 | IF FALSE, the tag is simply stored and can be retrieved with |
64 | getBcp47(). |
65 | |
66 | Note that conversions to ISO codes, locales or LanguageType or |
67 | obtaining language or script will canonicalize the tag string anyway, |
68 | so specifying bCanonicalize=false is not a guarantee that the tag will |
69 | stay identical to what was passed. |
70 | */ |
71 | explicit LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize = false ); |
72 | |
73 | /** Init LanguageTag with Locale. */ |
74 | explicit LanguageTag( const com::sun::star::lang::Locale & rLocale ); |
75 | |
76 | /** Init LanguageTag with LanguageType MS-LangID. */ |
77 | explicit LanguageTag( LanguageType nLanguage ); |
78 | |
79 | /** Default ctor, init LanguageTag with LanguageType LANGUAGE_DONTKNOW. |
80 | |
81 | To be able to use LanguageTag in maps etc., avoid otherwise. |
82 | */ |
83 | explicit LanguageTag(); |
84 | |
85 | /** Init LanguageTag with either BCP 47 language tag (precedence if not |
86 | empty), or a combination of language, script and country. |
87 | |
88 | This is a convenience ctor to be used in ODF import where these are |
89 | distinct attributes. |
90 | */ |
91 | explicit LanguageTag( const OUString& rBcp47, const OUString& rLanguage, |
92 | const OUString& rScript, const OUString& rCountry ); |
93 | |
94 | /** Init LanguageTag with rtl_Locale. |
95 | |
96 | This is a convenience ctor. |
97 | */ |
98 | explicit LanguageTag( const rtl_Locale & rLocale ); |
99 | |
100 | LanguageTag( const LanguageTag & rLanguageTag ); |
101 | ~LanguageTag(); |
102 | LanguageTag& operator=( const LanguageTag & rLanguageTag ); |
103 | |
104 | /** Obtain BCP 47 language tag. |
105 | |
106 | @param bResolveSystem |
107 | If TRUE, resolve an empty language tag denoting the system |
108 | locale to the real locale used. |
109 | If FALSE, return an empty OUString for such a tag. |
110 | */ |
111 | const OUString & getBcp47( bool bResolveSystem = true ) const; |
112 | |
113 | /** Obtain language tag as Locale. |
114 | |
115 | As a convention, language tags that can not be expressed as "pure" |
116 | com::sun::star::lang::Locale content using Language and Country fields |
117 | store "qlt" (ISO 639 reserved for local use) in the Language field and |
118 | the entire BCP 47 language tag in the Variant field. The Country field |
119 | contains the corresponding ISO 3166 country code _if_ there is one, or |
120 | otherwise is empty. |
121 | |
122 | @param bResolveSystem |
123 | If TRUE, resolve an empty language tag denoting the system |
124 | locale to the real locale used. |
125 | If FALSE, return an empty Locale for such a tag. |
126 | */ |
127 | const com::sun::star::lang::Locale & getLocale( bool bResolveSystem = true ) const; |
128 | |
129 | /** Obtain mapping to MS-LangID. |
130 | |
131 | @param bResolveSystem |
132 | If TRUE, resolve an empty language tag denoting the system |
133 | locale to the real locale used. |
134 | If FALSE, return LANGUAGE_SYSTEM for such a tag. |
135 | */ |
136 | LanguageType getLanguageType( bool bResolveSystem = true ) const; |
137 | |
138 | /** Obtain ISO strings for language, script and country. |
139 | |
140 | This is a convenience method for ODF export places only. Avoid use in |
141 | other code. |
142 | |
143 | ATTENTION! May return empty strings if the language tag is not |
144 | expressable in valid ISO codes! |
145 | |
146 | @see isIsoODF() |
147 | |
148 | Always resolves an empty tag to the system locale. |
149 | */ |
150 | void getIsoLanguageScriptCountry( OUString& rLanguage, |
151 | OUString& rScript, OUString& rCountry ) const; |
152 | |
153 | /** Get ISO 639 language code, or BCP 47 language. |
154 | |
155 | Always resolves an empty tag to the system locale. |
156 | */ |
157 | OUString getLanguage() const; |
158 | |
159 | /** Get ISO 15924 script code, if not the default script according to |
160 | BCP 47. For default script an empty string is returned. |
161 | |
162 | @see hasScript() |
163 | |
164 | Always resolves an empty tag to the system locale. |
165 | */ |
166 | OUString getScript() const; |
167 | |
168 | /** Get combined language and script code, separated by '-' if |
169 | non-default script, if default script only language. |
170 | |
171 | @see hasScript() |
172 | |
173 | Always resolves an empty tag to the system locale. |
174 | */ |
175 | OUString getLanguageAndScript() const; |
176 | |
177 | /** Get ISO 3166 country alpha code. Empty if the BCP 47 tags denote a |
178 | region not expressable as 2 character country code. |
179 | |
180 | Always resolves an empty tag to the system locale. |
181 | */ |
182 | OUString getCountry() const; |
183 | |
184 | /** Get BCP 47 region tag, which may be an ISO 3166 country alpha code or |
185 | any other BCP 47 region tag. |
186 | |
187 | Always resolves an empty tag to the system locale. |
188 | */ |
189 | OUString getRegion() const; |
190 | |
191 | /** Get BCP 47 variant subtags, of the IANA Language Subtag Registry. |
192 | |
193 | If there are multiple variant subtags they are separated by '-'. |
194 | |
195 | This is NOT related to Locale.Variant! |
196 | |
197 | Always resolves an empty tag to the system locale. |
198 | */ |
199 | OUString getVariants() const; |
200 | |
201 | /** Get a GLIBC locale string. |
202 | |
203 | Always resolves an empty tag to the system locale. |
204 | |
205 | @param rEncoding |
206 | An encoding to be appended to language_country, for example |
207 | ".UTF-8" including the dot. |
208 | |
209 | @return The resulting GLIBC locale string if it could be constructed, |
210 | if not an empty string is returned. |
211 | */ |
212 | OUString getGlibcLocaleString( const OUString & rEncoding ) const; |
213 | |
214 | /** If language tag has a non-default script specified. |
215 | */ |
216 | bool hasScript() const; |
217 | |
218 | /** If language tag is a locale that can be expressed using only ISO 639 |
219 | language codes and ISO 3166 country codes, thus is convertible to a |
220 | conforming Locale struct without using extension mechanisms. |
221 | |
222 | Note that an empty language tag or empty Locale::Language field or |
223 | LanguageType LANGUAGE_SYSTEM could be treated as a valid ISO locale in |
224 | some context, but here is not. If you want that ask for |
225 | aTag.isSystemLocale() || aTag.isIsoLocale() |
226 | |
227 | Always resolves an empty tag to the system locale. |
228 | */ |
229 | bool isIsoLocale() const; |
230 | |
231 | /** If language tag is a locale that can be expressed using only ISO 639 |
232 | language codes and ISO 15924 script codes and ISO 3166 country codes, |
233 | thus can be stored in an ODF document using only fo:language, fo:script |
234 | and fo:country attributes. If this is FALSE, the locale must be stored |
235 | as a <*:rfc-language-tag> element. |
236 | |
237 | Always resolves an empty tag to the system locale. |
238 | */ |
239 | bool isIsoODF() const; |
240 | |
241 | /** If this is a valid BCP 47 language tag. |
242 | |
243 | Always resolves an empty tag to the system locale. |
244 | */ |
245 | bool isValidBcp47() const; |
246 | |
247 | /** If this tag was contructed as an empty tag denoting the system locale. |
248 | */ |
249 | bool isSystemLocale() const; |
250 | |
251 | |
252 | /** Reset with existing BCP 47 language tag string. See ctor. */ |
253 | LanguageTag & reset( const OUString & rBcp47LanguageTag, bool bCanonicalize = false ); |
254 | |
255 | /** Reset with Locale. */ |
256 | LanguageTag & reset( const com::sun::star::lang::Locale & rLocale ); |
257 | |
258 | /** Reset with LanguageType MS-LangID. */ |
259 | LanguageTag & reset( LanguageType nLanguage ); |
260 | |
261 | /** Reset with rtl_Locale. */ |
262 | LanguageTag & reset( const rtl_Locale & rLocale ); |
263 | |
264 | |
265 | /** Fall back to a known locale. |
266 | |
267 | If the current tag does not represent a known (by us) locale, fall back |
268 | to the most likely locale possible known. |
269 | If the current tag is known, no change occurs. |
270 | */ |
271 | LanguageTag & makeFallback(); |
272 | |
273 | /** Return a vector of fall-back strings. |
274 | |
275 | In order: |
276 | full BCP 47 tag, same as getBcp47() |
277 | lll-Ssss-CC |
278 | lll-Ssss |
279 | lll-CC |
280 | lll |
281 | |
282 | If the tag includes variants the order is: |
283 | full BCP 47 tag, same as getBcp47() |
284 | lll-Ssss-CC-vvvvvvvv |
285 | lll-Ssss-vvvvvvvv |
286 | lll-Ssss-CC |
287 | lll-Ssss |
288 | lll-CC-vvvvvvvv |
289 | lll-vvvvvvvv |
290 | lll-CC |
291 | lll |
292 | |
293 | Only strings that differ from a higher order are included, for example |
294 | if there is no script the elements will be bcp47, lll-CC, lll; if the |
295 | bcp47 string is identical to lll-CC then only lll-CC, lll. |
296 | |
297 | Note that lll is only ISO 639-1/2 alpha code and CC is only ISO 3166 |
298 | alpha code. If the region can not be expressed as ISO 3166 then no -CC |
299 | tags are included. |
300 | |
301 | @param bIncludeFullBcp47 |
302 | If TRUE, the full BCP 47 tag is included as first element. |
303 | If FALSE, the full tag is not included; used if the caller |
304 | obtains the fallbacks only if the full tag did not lead to a |
305 | match, so subsequent tries need not to include it again. |
306 | */ |
307 | ::std::vector< OUString > getFallbackStrings( bool bIncludeFullBcp47 ) const; |
308 | |
309 | |
310 | /** @short Search for an equal or at least for a similar locale in a list |
311 | of possible ones. |
312 | |
313 | @descr First search for a locale that is equal to the reference |
314 | locale. (means: same BCP47 string) |
315 | |
316 | If the reference locale could not be located, check for |
317 | "similar" locales, in the same order as obtained by |
318 | getFallbackStrings(). |
319 | |
320 | If no similar locale could be located, we search for a locale |
321 | "en-US" inside the given locale list. |
322 | |
323 | If "en-US" could not be located, we search for a locale "en" |
324 | inside the given list. |
325 | |
326 | If no "same" nor any "similar" locale could be found, we try |
327 | "x-default" and "x-no-translate" explicitly. Sometimes |
328 | variables don't use real localization. For example, in case the |
329 | localized value is a fix product name. |
330 | |
331 | If no locale matched until then, we use any other locale that |
332 | exists inside the set of given ones, namely the first |
333 | encountered! |
334 | |
335 | @param rList |
336 | the vector of possible locales as BCP47 strings. |
337 | |
338 | @param rReference |
339 | the reference locale, BCP47 string. |
340 | |
341 | @return An iterator that points to the found element inside the given |
342 | locale list. If no matching locale could be found it points to |
343 | the beginning of the list. |
344 | */ |
345 | static ::std::vector< OUString >::const_iterator getFallback( const ::std::vector< OUString > & rList, |
346 | const OUString & rReference ); |
347 | |
348 | |
349 | /** @short Search for an equal or for a similar locale in a list |
350 | of possible ones where at least the language matches. |
351 | |
352 | @descr First search for a locale that is equal to the reference |
353 | locale. |
354 | |
355 | If the reference locale could not be located, check for |
356 | "similar" locales, in the same order as obtained by |
357 | getFallbackStrings(). |
358 | |
359 | If no locale matches, rList.end() is returned. |
360 | |
361 | @param rList |
362 | the vector of possible locales. |
363 | |
364 | @param rReference |
365 | the reference locale. |
366 | |
367 | @return An iterator that points to the found element inside the given |
368 | locale list. If no matching locale could be found it points to |
369 | the end of the list. |
370 | */ |
371 | static ::std::vector< com::sun::star::lang::Locale >::const_iterator getMatchingFallback( |
372 | const ::std::vector< com::sun::star::lang::Locale > & rList, |
373 | const com::sun::star::lang::Locale & rReference ); |
374 | |
375 | |
376 | /** Test equality of two LanguageTag, possibly resolving system locale. |
377 | |
378 | @param bResolveSystem |
379 | If TRUE, resolve empty language tags denoting the system |
380 | locale to the real locale used before comparing. |
381 | If FALSE, the behavior is identical to operator==(), system |
382 | locales are not resolved first. |
383 | */ |
384 | bool equals( const LanguageTag & rLanguageTag, bool bResolveSystem = false ) const; |
385 | |
386 | /** Test equality of two LanguageTag. |
387 | |
388 | Does NOT resolve system, i.e. if the system locale is en-US |
389 | LanguageTag("")==LanguageTag("en-US") returns false! Use |
390 | equals(...,true) instead if system locales shall be resolved. |
391 | */ |
392 | bool operator==( const LanguageTag & rLanguageTag ) const; |
393 | |
394 | /** Test inequality of two LanguageTag. |
395 | |
396 | Does NOT resolve system, i.e. if the system locale is en-US |
397 | LanguageTag("")!=LanguageTag("en-US") returns true! Use |
398 | !equals(,...true) instead if system locales shall be resolved. |
399 | */ |
400 | bool operator!=( const LanguageTag & rLanguageTag ) const; |
401 | |
402 | /** Test this LanguageTag less than that LanguageTag. |
403 | |
404 | For sorted containers. Does NOT resolve system. |
405 | */ |
406 | bool operator<( const LanguageTag & rLanguageTag ) const; |
407 | |
408 | /** Convert MS-LangID to Locale. |
409 | |
410 | @param bResolveSystem |
411 | If TRUE, resolve an empty language tag denoting the system |
412 | locale to the real locale used. |
413 | If FALSE, return an empty Locale for such a tag. |
414 | */ |
415 | static com::sun::star::lang::Locale convertToLocale( LanguageType nLangID, bool bResolveSystem = true ); |
416 | |
417 | /** Convert Locale to MS-LangID. |
418 | |
419 | @param bResolveSystem |
420 | If TRUE, resolve an empty language tag denoting the system |
421 | locale to the real locale used. |
422 | If FALSE, return LANGUAGE_SYSTEM for such a tag. |
423 | */ |
424 | static LanguageType convertToLanguageType( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem = true ); |
425 | |
426 | /** Convert MS-LangID to BCP 47 string. |
427 | |
428 | @param bResolveSystem |
429 | If TRUE, resolve an empty language tag denoting the system |
430 | locale to the real locale used. |
431 | If FALSE, return an empty OUString for such a tag. |
432 | */ |
433 | static OUString convertToBcp47( LanguageType nLangID, bool bResolveSystem = true ); |
434 | |
435 | /** Convert Locale to BCP 47 string. |
436 | |
437 | @param bResolveSystem |
438 | If TRUE, resolve an empty language tag denoting the system |
439 | locale to the real locale used. |
440 | If FALSE, return an empty OUString for such a tag. |
441 | */ |
442 | static OUString convertToBcp47( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem = true ); |
443 | |
444 | /** Convert BCP 47 string to Locale, convenience method. |
445 | |
446 | NOTE: exists only for consistency with the other convertTo...() |
447 | methods, internally uses a temporary LanguageTag instance for |
448 | conversion so does not save anything compared to |
449 | LanguageTag(rBcp47).getLocale(bResolveSystem). |
450 | |
451 | @param bResolveSystem |
452 | If TRUE, resolve an empty language tag denoting the system |
453 | locale to the real locale used. |
454 | If FALSE, return an empty Locale for such a tag. |
455 | */ |
456 | static com::sun::star::lang::Locale convertToLocale( const OUString& rBcp47, bool bResolveSystem = true ); |
457 | |
458 | /** Convert BCP 47 string to MS-LangID, convenience method. |
459 | |
460 | NOTE: exists only for consistency with the other convertTo...() |
461 | methods, internally uses a temporary LanguageTag instance for |
462 | conversion so does not save anything compared to |
463 | LanguageTag(rBcp47).getLanguageType(bResolveSystem). |
464 | |
465 | @param bResolveSystem |
466 | If TRUE, resolve an empty language tag denoting the system |
467 | locale to the real locale used. |
468 | If FALSE, return LANGUAGE_SYSTEM for such a tag. |
469 | */ |
470 | static LanguageType convertToLanguageType( const OUString& rBcp47, bool bResolveSystem = true ); |
471 | |
472 | /** Convert BCP 47 string to MS-LangID with fallback, convenience method. |
473 | |
474 | NOTE: exists only for consistency with the other convertTo...() |
475 | methods, internally uses a temporary LanguageTag instance for |
476 | conversion so does not save anything compared to |
477 | LanguageTag(rBcp47).makeFallback().getLanguageType(bResolveSystem). |
478 | |
479 | @see makeFallback() |
480 | |
481 | Always resolves an empty tag to the system locale. |
482 | */ |
483 | static LanguageType convertToLanguageTypeWithFallback( const OUString& rBcp47 ); |
484 | |
485 | /** Convert BCP 47 string to Locale with fallback, convenience method. |
486 | |
487 | NOTE: exists only for consistency with the other convertTo...() |
488 | methods, internally uses a temporary LanguageTag instance for |
489 | conversion so does not save anything compared to |
490 | LanguageTag(rBcp47).makeFallback().getLocale(bResolveSystem). |
491 | |
492 | @see makeFallback() |
493 | |
494 | Always resolves an empty tag to the system locale. |
495 | */ |
496 | static com::sun::star::lang::Locale convertToLocaleWithFallback( const OUString& rBcp47 ); |
497 | |
498 | /** If nLang is a generated on-the-fly LangID */ |
499 | static bool isOnTheFlyID( LanguageType nLang ); |
500 | |
501 | /** @ATTENTION: _ONLY_ to be called by the application's configuration! */ |
502 | static void setConfiguredSystemLanguage( LanguageType nLang ); |
503 | |
504 | typedef ::boost::shared_ptr< LanguageTagImpl > ImplPtr; |
505 | |
506 | private: |
507 | |
508 | mutable com::sun::star::lang::Locale maLocale; |
509 | mutable OUString maBcp47; |
510 | mutable LanguageType mnLangID; |
511 | mutable ImplPtr mpImpl; |
512 | bool mbSystemLocale : 1; |
513 | mutable bool mbInitializedBcp47 : 1; |
514 | mutable bool mbInitializedLocale : 1; |
515 | mutable bool mbInitializedLangID : 1; |
516 | bool mbIsFallback : 1; |
517 | |
518 | ImplPtr getImpl() const; |
519 | ImplPtr registerImpl() const; |
520 | void syncFromImpl(); |
521 | void syncVarsFromRawImpl() const; |
522 | void syncVarsFromImpl() const; |
523 | |
524 | void convertLocaleToBcp47(); |
525 | void convertLocaleToLang(); |
526 | void convertBcp47ToLocale(); |
527 | void convertBcp47ToLang(); |
528 | void convertLangToLocale(); |
529 | void convertLangToBcp47(); |
530 | |
531 | void convertFromRtlLocale(); |
532 | |
533 | /** Canonicalize if not yet done and synchronize initialized conversions. |
534 | |
535 | @return whether BCP 47 language tag string was changed. |
536 | */ |
537 | bool synCanonicalize(); |
538 | |
539 | void resetVars(); |
540 | |
541 | static bool isIsoLanguage( const OUString& rLanguage ); |
542 | static bool isIsoScript( const OUString& rScript ); |
543 | static bool isIsoCountry( const OUString& rRegion ); |
544 | |
545 | }; |
546 | |
547 | #endif // INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX |
548 | |
549 | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |
550 | |