1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9
10#ifndef INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
11#define INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
12
13#include <sal/config.h>
14#include <rtl/ustring.hxx>
15#include <com/sun/star/lang/Locale.hpp>
16#include <i18nlangtag/i18nlangtagdllapi.h>
17#include <i18nlangtag/lang.h>
18
19#include <boost/shared_ptr.hpp>
20#include <vector>
21
22typedef struct _rtl_Locale rtl_Locale; // as in rtl/locale.h
23
24
25/** The ISO 639-2 code reserved for local use used to indicate that a
26 com::sun::star::Locale contains a BCP 47 string in its Variant field. The
27 Locale's Language field then will contain this language code.
28
29 @see LanguageTag::getLocale()
30
31 Avoid use, only needed internally or if conversion from Locale to
32 LanguageTag is not wanted, i.e. during ODF import. To check whether a
33 LanguageTag contains a plain language/country combination or a more
34 detailed BCP 47 language tag use LanguageTag::isIsoLocale() instead.
35 */
36#define I18NLANGTAG_QLT "qlt"
37
38
39class LanguageTagImpl;
40
41
42/** Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and
43 conversions in between.
44
45 Note that member variables are mutable and may change their values even in
46 const methods. Getter methods return either the original value or matching
47 converted values.
48
49 For standalone conversions if no LanguageTag instance is at hand, static
50 convertTo...() methods exist.
51 */
52class I18NLANGTAG_DLLPUBLIC LanguageTag
53{
54 friend class LanguageTagImpl;
55
56public:
57
58 /** Init LanguageTag with existing BCP 47 language tag string.
59
60 @param bCanonicalize
61 If TRUE, canonicalize tag and reparse, the resulting tag string may
62 be different.
63 IF FALSE, the tag is simply stored and can be retrieved with
64 getBcp47().
65
66 Note that conversions to ISO codes, locales or LanguageType or
67 obtaining language or script will canonicalize the tag string anyway,
68 so specifying bCanonicalize=false is not a guarantee that the tag will
69 stay identical to what was passed.
70 */
71 explicit LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize = false );
72
73 /** Init LanguageTag with Locale. */
74 explicit LanguageTag( const com::sun::star::lang::Locale & rLocale );
75
76 /** Init LanguageTag with LanguageType MS-LangID. */
77 explicit LanguageTag( LanguageType nLanguage );
78
79 /** Default ctor, init LanguageTag with LanguageType LANGUAGE_DONTKNOW.
80
81 To be able to use LanguageTag in maps etc., avoid otherwise.
82 */
83 explicit LanguageTag();
84
85 /** Init LanguageTag with either BCP 47 language tag (precedence if not
86 empty), or a combination of language, script and country.
87
88 This is a convenience ctor to be used in ODF import where these are
89 distinct attributes.
90 */
91 explicit LanguageTag( const OUString& rBcp47, const OUString& rLanguage,
92 const OUString& rScript, const OUString& rCountry );
93
94 /** Init LanguageTag with rtl_Locale.
95
96 This is a convenience ctor.
97 */
98 explicit LanguageTag( const rtl_Locale & rLocale );
99
100 LanguageTag( const LanguageTag & rLanguageTag );
101 ~LanguageTag();
102 LanguageTag& operator=( const LanguageTag & rLanguageTag );
103
104 /** Obtain BCP 47 language tag.
105
106 @param bResolveSystem
107 If TRUE, resolve an empty language tag denoting the system
108 locale to the real locale used.
109 If FALSE, return an empty OUString for such a tag.
110 */
111 const OUString & getBcp47( bool bResolveSystem = true ) const;
112
113 /** Obtain language tag as Locale.
114
115 As a convention, language tags that can not be expressed as "pure"
116 com::sun::star::lang::Locale content using Language and Country fields
117 store "qlt" (ISO 639 reserved for local use) in the Language field and
118 the entire BCP 47 language tag in the Variant field. The Country field
119 contains the corresponding ISO 3166 country code _if_ there is one, or
120 otherwise is empty.
121
122 @param bResolveSystem
123 If TRUE, resolve an empty language tag denoting the system
124 locale to the real locale used.
125 If FALSE, return an empty Locale for such a tag.
126 */
127 const com::sun::star::lang::Locale & getLocale( bool bResolveSystem = true ) const;
128
129 /** Obtain mapping to MS-LangID.
130
131 @param bResolveSystem
132 If TRUE, resolve an empty language tag denoting the system
133 locale to the real locale used.
134 If FALSE, return LANGUAGE_SYSTEM for such a tag.
135 */
136 LanguageType getLanguageType( bool bResolveSystem = true ) const;
137
138 /** Obtain ISO strings for language, script and country.
139
140 This is a convenience method for ODF export places only. Avoid use in
141 other code.
142
143 ATTENTION! May return empty strings if the language tag is not
144 expressable in valid ISO codes!
145
146 @see isIsoODF()
147
148 Always resolves an empty tag to the system locale.
149 */
150 void getIsoLanguageScriptCountry( OUString& rLanguage,
151 OUString& rScript, OUString& rCountry ) const;
152
153 /** Get ISO 639 language code, or BCP 47 language.
154
155 Always resolves an empty tag to the system locale.
156 */
157 OUString getLanguage() const;
158
159 /** Get ISO 15924 script code, if not the default script according to
160 BCP 47. For default script an empty string is returned.
161
162 @see hasScript()
163
164 Always resolves an empty tag to the system locale.
165 */
166 OUString getScript() const;
167
168 /** Get combined language and script code, separated by '-' if
169 non-default script, if default script only language.
170
171 @see hasScript()
172
173 Always resolves an empty tag to the system locale.
174 */
175 OUString getLanguageAndScript() const;
176
177 /** Get ISO 3166 country alpha code. Empty if the BCP 47 tags denote a
178 region not expressable as 2 character country code.
179
180 Always resolves an empty tag to the system locale.
181 */
182 OUString getCountry() const;
183
184 /** Get BCP 47 region tag, which may be an ISO 3166 country alpha code or
185 any other BCP 47 region tag.
186
187 Always resolves an empty tag to the system locale.
188 */
189 OUString getRegion() const;
190
191 /** Get BCP 47 variant subtags, of the IANA Language Subtag Registry.
192
193 If there are multiple variant subtags they are separated by '-'.
194
195 This is NOT related to Locale.Variant!
196
197 Always resolves an empty tag to the system locale.
198 */
199 OUString getVariants() const;
200
201 /** Get a GLIBC locale string.
202
203 Always resolves an empty tag to the system locale.
204
205 @param rEncoding
206 An encoding to be appended to language_country, for example
207 ".UTF-8" including the dot.
208
209 @return The resulting GLIBC locale string if it could be constructed,
210 if not an empty string is returned.
211 */
212 OUString getGlibcLocaleString( const OUString & rEncoding ) const;
213
214 /** If language tag has a non-default script specified.
215 */
216 bool hasScript() const;
217
218 /** If language tag is a locale that can be expressed using only ISO 639
219 language codes and ISO 3166 country codes, thus is convertible to a
220 conforming Locale struct without using extension mechanisms.
221
222 Note that an empty language tag or empty Locale::Language field or
223 LanguageType LANGUAGE_SYSTEM could be treated as a valid ISO locale in
224 some context, but here is not. If you want that ask for
225 aTag.isSystemLocale() || aTag.isIsoLocale()
226
227 Always resolves an empty tag to the system locale.
228 */
229 bool isIsoLocale() const;
230
231 /** If language tag is a locale that can be expressed using only ISO 639
232 language codes and ISO 15924 script codes and ISO 3166 country codes,
233 thus can be stored in an ODF document using only fo:language, fo:script
234 and fo:country attributes. If this is FALSE, the locale must be stored
235 as a <*:rfc-language-tag> element.
236
237 Always resolves an empty tag to the system locale.
238 */
239 bool isIsoODF() const;
240
241 /** If this is a valid BCP 47 language tag.
242
243 Always resolves an empty tag to the system locale.
244 */
245 bool isValidBcp47() const;
246
247 /** If this tag was contructed as an empty tag denoting the system locale.
248 */
249 bool isSystemLocale() const;
250
251
252 /** Reset with existing BCP 47 language tag string. See ctor. */
253 LanguageTag & reset( const OUString & rBcp47LanguageTag, bool bCanonicalize = false );
254
255 /** Reset with Locale. */
256 LanguageTag & reset( const com::sun::star::lang::Locale & rLocale );
257
258 /** Reset with LanguageType MS-LangID. */
259 LanguageTag & reset( LanguageType nLanguage );
260
261 /** Reset with rtl_Locale. */
262 LanguageTag & reset( const rtl_Locale & rLocale );
263
264
265 /** Fall back to a known locale.
266
267 If the current tag does not represent a known (by us) locale, fall back
268 to the most likely locale possible known.
269 If the current tag is known, no change occurs.
270 */
271 LanguageTag & makeFallback();
272
273 /** Return a vector of fall-back strings.
274
275 In order:
276 full BCP 47 tag, same as getBcp47()
277 lll-Ssss-CC
278 lll-Ssss
279 lll-CC
280 lll
281
282 If the tag includes variants the order is:
283 full BCP 47 tag, same as getBcp47()
284 lll-Ssss-CC-vvvvvvvv
285 lll-Ssss-vvvvvvvv
286 lll-Ssss-CC
287 lll-Ssss
288 lll-CC-vvvvvvvv
289 lll-vvvvvvvv
290 lll-CC
291 lll
292
293 Only strings that differ from a higher order are included, for example
294 if there is no script the elements will be bcp47, lll-CC, lll; if the
295 bcp47 string is identical to lll-CC then only lll-CC, lll.
296
297 Note that lll is only ISO 639-1/2 alpha code and CC is only ISO 3166
298 alpha code. If the region can not be expressed as ISO 3166 then no -CC
299 tags are included.
300
301 @param bIncludeFullBcp47
302 If TRUE, the full BCP 47 tag is included as first element.
303 If FALSE, the full tag is not included; used if the caller
304 obtains the fallbacks only if the full tag did not lead to a
305 match, so subsequent tries need not to include it again.
306 */
307 ::std::vector< OUString > getFallbackStrings( bool bIncludeFullBcp47 ) const;
308
309
310 /** @short Search for an equal or at least for a similar locale in a list
311 of possible ones.
312
313 @descr First search for a locale that is equal to the reference
314 locale. (means: same BCP47 string)
315
316 If the reference locale could not be located, check for
317 "similar" locales, in the same order as obtained by
318 getFallbackStrings().
319
320 If no similar locale could be located, we search for a locale
321 "en-US" inside the given locale list.
322
323 If "en-US" could not be located, we search for a locale "en"
324 inside the given list.
325
326 If no "same" nor any "similar" locale could be found, we try
327 "x-default" and "x-no-translate" explicitly. Sometimes
328 variables don't use real localization. For example, in case the
329 localized value is a fix product name.
330
331 If no locale matched until then, we use any other locale that
332 exists inside the set of given ones, namely the first
333 encountered!
334
335 @param rList
336 the vector of possible locales as BCP47 strings.
337
338 @param rReference
339 the reference locale, BCP47 string.
340
341 @return An iterator that points to the found element inside the given
342 locale list. If no matching locale could be found it points to
343 the beginning of the list.
344 */
345 static ::std::vector< OUString >::const_iterator getFallback( const ::std::vector< OUString > & rList,
346 const OUString & rReference );
347
348
349 /** @short Search for an equal or for a similar locale in a list
350 of possible ones where at least the language matches.
351
352 @descr First search for a locale that is equal to the reference
353 locale.
354
355 If the reference locale could not be located, check for
356 "similar" locales, in the same order as obtained by
357 getFallbackStrings().
358
359 If no locale matches, rList.end() is returned.
360
361 @param rList
362 the vector of possible locales.
363
364 @param rReference
365 the reference locale.
366
367 @return An iterator that points to the found element inside the given
368 locale list. If no matching locale could be found it points to
369 the end of the list.
370 */
371 static ::std::vector< com::sun::star::lang::Locale >::const_iterator getMatchingFallback(
372 const ::std::vector< com::sun::star::lang::Locale > & rList,
373 const com::sun::star::lang::Locale & rReference );
374
375
376 /** Test equality of two LanguageTag, possibly resolving system locale.
377
378 @param bResolveSystem
379 If TRUE, resolve empty language tags denoting the system
380 locale to the real locale used before comparing.
381 If FALSE, the behavior is identical to operator==(), system
382 locales are not resolved first.
383 */
384 bool equals( const LanguageTag & rLanguageTag, bool bResolveSystem = false ) const;
385
386 /** Test equality of two LanguageTag.
387
388 Does NOT resolve system, i.e. if the system locale is en-US
389 LanguageTag("")==LanguageTag("en-US") returns false! Use
390 equals(...,true) instead if system locales shall be resolved.
391 */
392 bool operator==( const LanguageTag & rLanguageTag ) const;
393
394 /** Test inequality of two LanguageTag.
395
396 Does NOT resolve system, i.e. if the system locale is en-US
397 LanguageTag("")!=LanguageTag("en-US") returns true! Use
398 !equals(,...true) instead if system locales shall be resolved.
399 */
400 bool operator!=( const LanguageTag & rLanguageTag ) const;
401
402 /** Test this LanguageTag less than that LanguageTag.
403
404 For sorted containers. Does NOT resolve system.
405 */
406 bool operator<( const LanguageTag & rLanguageTag ) const;
407
408 /** Convert MS-LangID to Locale.
409
410 @param bResolveSystem
411 If TRUE, resolve an empty language tag denoting the system
412 locale to the real locale used.
413 If FALSE, return an empty Locale for such a tag.
414 */
415 static com::sun::star::lang::Locale convertToLocale( LanguageType nLangID, bool bResolveSystem = true );
416
417 /** Convert Locale to MS-LangID.
418
419 @param bResolveSystem
420 If TRUE, resolve an empty language tag denoting the system
421 locale to the real locale used.
422 If FALSE, return LANGUAGE_SYSTEM for such a tag.
423 */
424 static LanguageType convertToLanguageType( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem = true );
425
426 /** Convert MS-LangID to BCP 47 string.
427
428 @param bResolveSystem
429 If TRUE, resolve an empty language tag denoting the system
430 locale to the real locale used.
431 If FALSE, return an empty OUString for such a tag.
432 */
433 static OUString convertToBcp47( LanguageType nLangID, bool bResolveSystem = true );
434
435 /** Convert Locale to BCP 47 string.
436
437 @param bResolveSystem
438 If TRUE, resolve an empty language tag denoting the system
439 locale to the real locale used.
440 If FALSE, return an empty OUString for such a tag.
441 */
442 static OUString convertToBcp47( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem = true );
443
444 /** Convert BCP 47 string to Locale, convenience method.
445
446 NOTE: exists only for consistency with the other convertTo...()
447 methods, internally uses a temporary LanguageTag instance for
448 conversion so does not save anything compared to
449 LanguageTag(rBcp47).getLocale(bResolveSystem).
450
451 @param bResolveSystem
452 If TRUE, resolve an empty language tag denoting the system
453 locale to the real locale used.
454 If FALSE, return an empty Locale for such a tag.
455 */
456 static com::sun::star::lang::Locale convertToLocale( const OUString& rBcp47, bool bResolveSystem = true );
457
458 /** Convert BCP 47 string to MS-LangID, convenience method.
459
460 NOTE: exists only for consistency with the other convertTo...()
461 methods, internally uses a temporary LanguageTag instance for
462 conversion so does not save anything compared to
463 LanguageTag(rBcp47).getLanguageType(bResolveSystem).
464
465 @param bResolveSystem
466 If TRUE, resolve an empty language tag denoting the system
467 locale to the real locale used.
468 If FALSE, return LANGUAGE_SYSTEM for such a tag.
469 */
470 static LanguageType convertToLanguageType( const OUString& rBcp47, bool bResolveSystem = true );
471
472 /** Convert BCP 47 string to MS-LangID with fallback, convenience method.
473
474 NOTE: exists only for consistency with the other convertTo...()
475 methods, internally uses a temporary LanguageTag instance for
476 conversion so does not save anything compared to
477 LanguageTag(rBcp47).makeFallback().getLanguageType(bResolveSystem).
478
479 @see makeFallback()
480
481 Always resolves an empty tag to the system locale.
482 */
483 static LanguageType convertToLanguageTypeWithFallback( const OUString& rBcp47 );
484
485 /** Convert BCP 47 string to Locale with fallback, convenience method.
486
487 NOTE: exists only for consistency with the other convertTo...()
488 methods, internally uses a temporary LanguageTag instance for
489 conversion so does not save anything compared to
490 LanguageTag(rBcp47).makeFallback().getLocale(bResolveSystem).
491
492 @see makeFallback()
493
494 Always resolves an empty tag to the system locale.
495 */
496 static com::sun::star::lang::Locale convertToLocaleWithFallback( const OUString& rBcp47 );
497
498 /** If nLang is a generated on-the-fly LangID */
499 static bool isOnTheFlyID( LanguageType nLang );
500
501 /** @ATTENTION: _ONLY_ to be called by the application's configuration! */
502 static void setConfiguredSystemLanguage( LanguageType nLang );
503
504 typedef ::boost::shared_ptr< LanguageTagImpl > ImplPtr;
505
506private:
507
508 mutable com::sun::star::lang::Locale maLocale;
509 mutable OUString maBcp47;
510 mutable LanguageType mnLangID;
511 mutable ImplPtr mpImpl;
512 bool mbSystemLocale : 1;
513 mutable bool mbInitializedBcp47 : 1;
514 mutable bool mbInitializedLocale : 1;
515 mutable bool mbInitializedLangID : 1;
516 bool mbIsFallback : 1;
517
518 ImplPtr getImpl() const;
519 ImplPtr registerImpl() const;
520 void syncFromImpl();
521 void syncVarsFromRawImpl() const;
522 void syncVarsFromImpl() const;
523
524 void convertLocaleToBcp47();
525 void convertLocaleToLang();
526 void convertBcp47ToLocale();
527 void convertBcp47ToLang();
528 void convertLangToLocale();
529 void convertLangToBcp47();
530
531 void convertFromRtlLocale();
532
533 /** Canonicalize if not yet done and synchronize initialized conversions.
534
535 @return whether BCP 47 language tag string was changed.
536 */
537 bool synCanonicalize();
538
539 void resetVars();
540
541 static bool isIsoLanguage( const OUString& rLanguage );
542 static bool isIsoScript( const OUString& rScript );
543 static bool isIsoCountry( const OUString& rRegion );
544
545};
546
547#endif // INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
548
549/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
550