locale.py 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931
  1. """ Locale support.
  2. The module provides low-level access to the C lib's locale APIs
  3. and adds high level number formatting APIs as well as a locale
  4. aliasing engine to complement these.
  5. The aliasing engine includes support for many commonly used locale
  6. names and maps them to values suitable for passing to the C lib's
  7. setlocale() function. It also includes default encodings for all
  8. supported locale names.
  9. """
  10. import sys
  11. # Try importing the _locale module.
  12. #
  13. # If this fails, fall back on a basic 'C' locale emulation.
  14. # Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
  15. # trying the import. So __all__ is also fiddled at the end of the file.
  16. __all__ = ["setlocale","Error","localeconv","strcoll","strxfrm",
  17. "format","str","atof","atoi","LC_CTYPE","LC_COLLATE",
  18. "LC_TIME","LC_MONETARY","LC_NUMERIC", "LC_ALL","CHAR_MAX"]
  19. try:
  20. from _locale import *
  21. except ImportError:
  22. # Locale emulation
  23. CHAR_MAX = 127
  24. LC_ALL = 6
  25. LC_COLLATE = 3
  26. LC_CTYPE = 0
  27. LC_MESSAGES = 5
  28. LC_MONETARY = 4
  29. LC_NUMERIC = 1
  30. LC_TIME = 2
  31. Error = ValueError
  32. def localeconv():
  33. """ localeconv() -> dict.
  34. Returns numeric and monetary locale-specific parameters.
  35. """
  36. # 'C' locale default values
  37. return {'grouping': [127],
  38. 'currency_symbol': '',
  39. 'n_sign_posn': 127,
  40. 'p_cs_precedes': 127,
  41. 'n_cs_precedes': 127,
  42. 'mon_grouping': [],
  43. 'n_sep_by_space': 127,
  44. 'decimal_point': '.',
  45. 'negative_sign': '',
  46. 'positive_sign': '',
  47. 'p_sep_by_space': 127,
  48. 'int_curr_symbol': '',
  49. 'p_sign_posn': 127,
  50. 'thousands_sep': '',
  51. 'mon_thousands_sep': '',
  52. 'frac_digits': 127,
  53. 'mon_decimal_point': '',
  54. 'int_frac_digits': 127}
  55. def setlocale(category, value=None):
  56. """ setlocale(integer,string=None) -> string.
  57. Activates/queries locale processing.
  58. """
  59. if value not in (None, '', 'C'):
  60. raise Error, '_locale emulation only supports "C" locale'
  61. return 'C'
  62. def strcoll(a,b):
  63. """ strcoll(string,string) -> int.
  64. Compares two strings according to the locale.
  65. """
  66. return cmp(a,b)
  67. def strxfrm(s):
  68. """ strxfrm(string) -> string.
  69. Returns a string that behaves for cmp locale-aware.
  70. """
  71. return s
  72. ### Number formatting APIs
  73. # Author: Martin von Loewis
  74. #perform the grouping from right to left
  75. def _group(s):
  76. conv=localeconv()
  77. grouping=conv['grouping']
  78. if not grouping:return (s, 0)
  79. result=""
  80. seps = 0
  81. spaces = ""
  82. if s[-1] == ' ':
  83. sp = s.find(' ')
  84. spaces = s[sp:]
  85. s = s[:sp]
  86. while s and grouping:
  87. # if grouping is -1, we are done
  88. if grouping[0]==CHAR_MAX:
  89. break
  90. # 0: re-use last group ad infinitum
  91. elif grouping[0]!=0:
  92. #process last group
  93. group=grouping[0]
  94. grouping=grouping[1:]
  95. if result:
  96. result=s[-group:]+conv['thousands_sep']+result
  97. seps += 1
  98. else:
  99. result=s[-group:]
  100. s=s[:-group]
  101. if s and s[-1] not in "0123456789":
  102. # the leading string is only spaces and signs
  103. return s+result+spaces,seps
  104. if not result:
  105. return s+spaces,seps
  106. if s:
  107. result=s+conv['thousands_sep']+result
  108. seps += 1
  109. return result+spaces,seps
  110. def format(f,val,grouping=0):
  111. """Formats a value in the same way that the % formatting would use,
  112. but takes the current locale into account.
  113. Grouping is applied if the third parameter is true."""
  114. result = f % val
  115. fields = result.split(".")
  116. seps = 0
  117. if grouping:
  118. fields[0],seps=_group(fields[0])
  119. if len(fields)==2:
  120. result = fields[0]+localeconv()['decimal_point']+fields[1]
  121. elif len(fields)==1:
  122. result = fields[0]
  123. else:
  124. raise Error, "Too many decimal points in result string"
  125. while seps:
  126. # If the number was formatted for a specific width, then it
  127. # might have been filled with spaces to the left or right. If
  128. # so, kill as much spaces as there where separators.
  129. # Leading zeroes as fillers are not yet dealt with, as it is
  130. # not clear how they should interact with grouping.
  131. sp = result.find(" ")
  132. if sp==-1:break
  133. result = result[:sp]+result[sp+1:]
  134. seps -= 1
  135. return result
  136. def str(val):
  137. """Convert float to integer, taking the locale into account."""
  138. return format("%.12g",val)
  139. def atof(string,func=float):
  140. "Parses a string as a float according to the locale settings."
  141. #First, get rid of the grouping
  142. ts = localeconv()['thousands_sep']
  143. if ts:
  144. string = string.replace(ts, '')
  145. #next, replace the decimal point with a dot
  146. dd = localeconv()['decimal_point']
  147. if dd:
  148. string = string.replace(dd, '.')
  149. #finally, parse the string
  150. return func(string)
  151. def atoi(str):
  152. "Converts a string to an integer according to the locale settings."
  153. return atof(str, int)
  154. def _test():
  155. setlocale(LC_ALL, "")
  156. #do grouping
  157. s1=format("%d", 123456789,1)
  158. print s1, "is", atoi(s1)
  159. #standard formatting
  160. s1=str(3.14)
  161. print s1, "is", atof(s1)
  162. ### Locale name aliasing engine
  163. # Author: Marc-Andre Lemburg, [email protected]
  164. # Various tweaks by Fredrik Lundh <[email protected]>
  165. # store away the low-level version of setlocale (it's
  166. # overridden below)
  167. _setlocale = setlocale
  168. def normalize(localename):
  169. """ Returns a normalized locale code for the given locale
  170. name.
  171. The returned locale code is formatted for use with
  172. setlocale().
  173. If normalization fails, the original name is returned
  174. unchanged.
  175. If the given encoding is not known, the function defaults to
  176. the default encoding for the locale code just like setlocale()
  177. does.
  178. """
  179. # Normalize the locale name and extract the encoding
  180. fullname = localename.lower()
  181. if ':' in fullname:
  182. # ':' is sometimes used as encoding delimiter.
  183. fullname = fullname.replace(':', '.')
  184. if '.' in fullname:
  185. langname, encoding = fullname.split('.')[:2]
  186. fullname = langname + '.' + encoding
  187. else:
  188. langname = fullname
  189. encoding = ''
  190. # First lookup: fullname (possibly with encoding)
  191. code = locale_alias.get(fullname, None)
  192. if code is not None:
  193. return code
  194. # Second try: langname (without encoding)
  195. code = locale_alias.get(langname, None)
  196. if code is not None:
  197. if '.' in code:
  198. langname, defenc = code.split('.')
  199. else:
  200. langname = code
  201. defenc = ''
  202. if encoding:
  203. encoding = encoding_alias.get(encoding, encoding)
  204. else:
  205. encoding = defenc
  206. if encoding:
  207. return langname + '.' + encoding
  208. else:
  209. return langname
  210. else:
  211. return localename
  212. def _parse_localename(localename):
  213. """ Parses the locale code for localename and returns the
  214. result as tuple (language code, encoding).
  215. The localename is normalized and passed through the locale
  216. alias engine. A ValueError is raised in case the locale name
  217. cannot be parsed.
  218. The language code corresponds to RFC 1766. code and encoding
  219. can be None in case the values cannot be determined or are
  220. unknown to this implementation.
  221. """
  222. code = normalize(localename)
  223. if '@' in code:
  224. # Deal with locale modifiers
  225. code, modifier = code.split('@')
  226. if modifier == 'euro' and '.' not in code:
  227. # Assume Latin-9 for @euro locales. This is bogus,
  228. # since some systems may use other encodings for these
  229. # locales. Also, we ignore other modifiers.
  230. return code, 'iso-8859-15'
  231. if '.' in code:
  232. return tuple(code.split('.')[:2])
  233. elif code == 'C':
  234. return None, None
  235. raise ValueError, 'unknown locale: %s' % localename
  236. def _build_localename(localetuple):
  237. """ Builds a locale code from the given tuple (language code,
  238. encoding).
  239. No aliasing or normalizing takes place.
  240. """
  241. language, encoding = localetuple
  242. if language is None:
  243. language = 'C'
  244. if encoding is None:
  245. return language
  246. else:
  247. return language + '.' + encoding
  248. def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
  249. """ Tries to determine the default locale settings and returns
  250. them as tuple (language code, encoding).
  251. According to POSIX, a program which has not called
  252. setlocale(LC_ALL, "") runs using the portable 'C' locale.
  253. Calling setlocale(LC_ALL, "") lets it use the default locale as
  254. defined by the LANG variable. Since we don't want to interfere
  255. with the current locale setting we thus emulate the behavior
  256. in the way described above.
  257. To maintain compatibility with other platforms, not only the
  258. LANG variable is tested, but a list of variables given as
  259. envvars parameter. The first found to be defined will be
  260. used. envvars defaults to the search path used in GNU gettext;
  261. it must always contain the variable name 'LANG'.
  262. Except for the code 'C', the language code corresponds to RFC
  263. 1766. code and encoding can be None in case the values cannot
  264. be determined.
  265. """
  266. try:
  267. # check if it's supported by the _locale module
  268. import _locale
  269. code, encoding = _locale._getdefaultlocale()
  270. except (ImportError, AttributeError):
  271. pass
  272. else:
  273. # make sure the code/encoding values are valid
  274. if sys.platform == "win32" and code and code[:2] == "0x":
  275. # map windows language identifier to language name
  276. code = windows_locale.get(int(code, 0))
  277. # ...add other platform-specific processing here, if
  278. # necessary...
  279. return code, encoding
  280. # fall back on POSIX behaviour
  281. import os
  282. lookup = os.environ.get
  283. for variable in envvars:
  284. localename = lookup(variable,None)
  285. if localename:
  286. if variable == 'LANGUAGE':
  287. localename = localename.split(':')[0]
  288. break
  289. else:
  290. localename = 'C'
  291. return _parse_localename(localename)
  292. def getlocale(category=LC_CTYPE):
  293. """ Returns the current setting for the given locale category as
  294. tuple (language code, encoding).
  295. category may be one of the LC_* value except LC_ALL. It
  296. defaults to LC_CTYPE.
  297. Except for the code 'C', the language code corresponds to RFC
  298. 1766. code and encoding can be None in case the values cannot
  299. be determined.
  300. """
  301. localename = _setlocale(category)
  302. if category == LC_ALL and ';' in localename:
  303. raise TypeError, 'category LC_ALL is not supported'
  304. return _parse_localename(localename)
  305. def setlocale(category, locale=None):
  306. """ Set the locale for the given category. The locale can be
  307. a string, a locale tuple (language code, encoding), or None.
  308. Locale tuples are converted to strings the locale aliasing
  309. engine. Locale strings are passed directly to the C lib.
  310. category may be given as one of the LC_* values.
  311. """
  312. if locale and type(locale) is not type(""):
  313. # convert to string
  314. locale = normalize(_build_localename(locale))
  315. return _setlocale(category, locale)
  316. def resetlocale(category=LC_ALL):
  317. """ Sets the locale for category to the default setting.
  318. The default setting is determined by calling
  319. getdefaultlocale(). category defaults to LC_ALL.
  320. """
  321. _setlocale(category, _build_localename(getdefaultlocale()))
  322. if sys.platform in ('win32', 'darwin', 'mac'):
  323. # On Win32, this will return the ANSI code page
  324. # On the Mac, it should return the system encoding;
  325. # it might return "ascii" instead
  326. def getpreferredencoding(do_setlocale = True):
  327. """Return the charset that the user is likely using."""
  328. import _locale
  329. return _locale._getdefaultlocale()[1]
  330. else:
  331. # On Unix, if CODESET is available, use that.
  332. try:
  333. CODESET
  334. except NameError:
  335. # Fall back to parsing environment variables :-(
  336. def getpreferredencoding(do_setlocale = True):
  337. """Return the charset that the user is likely using,
  338. by looking at environment variables."""
  339. return getdefaultlocale()[1]
  340. else:
  341. def getpreferredencoding(do_setlocale = True):
  342. """Return the charset that the user is likely using,
  343. according to the system configuration."""
  344. if do_setlocale:
  345. oldloc = setlocale(LC_CTYPE)
  346. setlocale(LC_CTYPE, "")
  347. result = nl_langinfo(CODESET)
  348. setlocale(LC_CTYPE, oldloc)
  349. return result
  350. else:
  351. return nl_langinfo(CODESET)
  352. ### Database
  353. #
  354. # The following data was extracted from the locale.alias file which
  355. # comes with X11 and then hand edited removing the explicit encoding
  356. # definitions and adding some more aliases. The file is usually
  357. # available as /usr/lib/X11/locale/locale.alias.
  358. #
  359. #
  360. # The encoding_alias table maps lowercase encoding alias names to C
  361. # locale encoding names (case-sensitive).
  362. #
  363. encoding_alias = {
  364. '437': 'C',
  365. 'c': 'C',
  366. 'iso8859': 'ISO8859-1',
  367. '8859': 'ISO8859-1',
  368. '88591': 'ISO8859-1',
  369. 'ascii': 'ISO8859-1',
  370. 'en': 'ISO8859-1',
  371. 'iso88591': 'ISO8859-1',
  372. 'iso_8859-1': 'ISO8859-1',
  373. '885915': 'ISO8859-15',
  374. 'iso885915': 'ISO8859-15',
  375. 'iso_8859-15': 'ISO8859-15',
  376. 'iso8859-2': 'ISO8859-2',
  377. 'iso88592': 'ISO8859-2',
  378. 'iso_8859-2': 'ISO8859-2',
  379. 'iso88595': 'ISO8859-5',
  380. 'iso88596': 'ISO8859-6',
  381. 'iso88597': 'ISO8859-7',
  382. 'iso88598': 'ISO8859-8',
  383. 'iso88599': 'ISO8859-9',
  384. 'iso-2022-jp': 'JIS7',
  385. 'jis': 'JIS7',
  386. 'jis7': 'JIS7',
  387. 'sjis': 'SJIS',
  388. 'tis620': 'TACTIS',
  389. 'ajec': 'eucJP',
  390. 'eucjp': 'eucJP',
  391. 'ujis': 'eucJP',
  392. 'utf-8': 'utf',
  393. 'utf8': 'utf',
  394. 'utf8@ucs4': 'utf',
  395. }
  396. #
  397. # The locale_alias table maps lowercase alias names to C locale names
  398. # (case-sensitive). Encodings are always separated from the locale
  399. # name using a dot ('.'); they should only be given in case the
  400. # language name is needed to interpret the given encoding alias
  401. # correctly (CJK codes often have this need).
  402. #
  403. locale_alias = {
  404. 'american': 'en_US.ISO8859-1',
  405. 'ar': 'ar_AA.ISO8859-6',
  406. 'ar_aa': 'ar_AA.ISO8859-6',
  407. 'ar_sa': 'ar_SA.ISO8859-6',
  408. 'arabic': 'ar_AA.ISO8859-6',
  409. 'bg': 'bg_BG.ISO8859-5',
  410. 'bg_bg': 'bg_BG.ISO8859-5',
  411. 'bulgarian': 'bg_BG.ISO8859-5',
  412. 'c-french': 'fr_CA.ISO8859-1',
  413. 'c': 'C',
  414. 'c_c': 'C',
  415. 'cextend': 'en_US.ISO8859-1',
  416. 'chinese-s': 'zh_CN.eucCN',
  417. 'chinese-t': 'zh_TW.eucTW',
  418. 'croatian': 'hr_HR.ISO8859-2',
  419. 'cs': 'cs_CZ.ISO8859-2',
  420. 'cs_cs': 'cs_CZ.ISO8859-2',
  421. 'cs_cz': 'cs_CZ.ISO8859-2',
  422. 'cz': 'cz_CZ.ISO8859-2',
  423. 'cz_cz': 'cz_CZ.ISO8859-2',
  424. 'czech': 'cs_CS.ISO8859-2',
  425. 'da': 'da_DK.ISO8859-1',
  426. 'da_dk': 'da_DK.ISO8859-1',
  427. 'danish': 'da_DK.ISO8859-1',
  428. 'de': 'de_DE.ISO8859-1',
  429. 'de_at': 'de_AT.ISO8859-1',
  430. 'de_ch': 'de_CH.ISO8859-1',
  431. 'de_de': 'de_DE.ISO8859-1',
  432. 'dutch': 'nl_BE.ISO8859-1',
  433. 'ee': 'ee_EE.ISO8859-4',
  434. 'el': 'el_GR.ISO8859-7',
  435. 'el_gr': 'el_GR.ISO8859-7',
  436. 'en': 'en_US.ISO8859-1',
  437. 'en_au': 'en_AU.ISO8859-1',
  438. 'en_ca': 'en_CA.ISO8859-1',
  439. 'en_gb': 'en_GB.ISO8859-1',
  440. 'en_ie': 'en_IE.ISO8859-1',
  441. 'en_nz': 'en_NZ.ISO8859-1',
  442. 'en_uk': 'en_GB.ISO8859-1',
  443. 'en_us': 'en_US.ISO8859-1',
  444. 'eng_gb': 'en_GB.ISO8859-1',
  445. 'english': 'en_EN.ISO8859-1',
  446. 'english_uk': 'en_GB.ISO8859-1',
  447. 'english_united-states': 'en_US.ISO8859-1',
  448. 'english_us': 'en_US.ISO8859-1',
  449. 'es': 'es_ES.ISO8859-1',
  450. 'es_ar': 'es_AR.ISO8859-1',
  451. 'es_bo': 'es_BO.ISO8859-1',
  452. 'es_cl': 'es_CL.ISO8859-1',
  453. 'es_co': 'es_CO.ISO8859-1',
  454. 'es_cr': 'es_CR.ISO8859-1',
  455. 'es_ec': 'es_EC.ISO8859-1',
  456. 'es_es': 'es_ES.ISO8859-1',
  457. 'es_gt': 'es_GT.ISO8859-1',
  458. 'es_mx': 'es_MX.ISO8859-1',
  459. 'es_ni': 'es_NI.ISO8859-1',
  460. 'es_pa': 'es_PA.ISO8859-1',
  461. 'es_pe': 'es_PE.ISO8859-1',
  462. 'es_py': 'es_PY.ISO8859-1',
  463. 'es_sv': 'es_SV.ISO8859-1',
  464. 'es_uy': 'es_UY.ISO8859-1',
  465. 'es_ve': 'es_VE.ISO8859-1',
  466. 'et': 'et_EE.ISO8859-4',
  467. 'et_ee': 'et_EE.ISO8859-4',
  468. 'fi': 'fi_FI.ISO8859-1',
  469. 'fi_fi': 'fi_FI.ISO8859-1',
  470. 'finnish': 'fi_FI.ISO8859-1',
  471. 'fr': 'fr_FR.ISO8859-1',
  472. 'fr_be': 'fr_BE.ISO8859-1',
  473. 'fr_ca': 'fr_CA.ISO8859-1',
  474. 'fr_ch': 'fr_CH.ISO8859-1',
  475. 'fr_fr': 'fr_FR.ISO8859-1',
  476. 'fre_fr': 'fr_FR.ISO8859-1',
  477. 'french': 'fr_FR.ISO8859-1',
  478. 'french_france': 'fr_FR.ISO8859-1',
  479. 'ger_de': 'de_DE.ISO8859-1',
  480. 'german': 'de_DE.ISO8859-1',
  481. 'german_germany': 'de_DE.ISO8859-1',
  482. 'greek': 'el_GR.ISO8859-7',
  483. 'hebrew': 'iw_IL.ISO8859-8',
  484. 'hr': 'hr_HR.ISO8859-2',
  485. 'hr_hr': 'hr_HR.ISO8859-2',
  486. 'hu': 'hu_HU.ISO8859-2',
  487. 'hu_hu': 'hu_HU.ISO8859-2',
  488. 'hungarian': 'hu_HU.ISO8859-2',
  489. 'icelandic': 'is_IS.ISO8859-1',
  490. 'id': 'id_ID.ISO8859-1',
  491. 'id_id': 'id_ID.ISO8859-1',
  492. 'is': 'is_IS.ISO8859-1',
  493. 'is_is': 'is_IS.ISO8859-1',
  494. 'iso-8859-1': 'en_US.ISO8859-1',
  495. 'iso-8859-15': 'en_US.ISO8859-15',
  496. 'iso8859-1': 'en_US.ISO8859-1',
  497. 'iso8859-15': 'en_US.ISO8859-15',
  498. 'iso_8859_1': 'en_US.ISO8859-1',
  499. 'iso_8859_15': 'en_US.ISO8859-15',
  500. 'it': 'it_IT.ISO8859-1',
  501. 'it_ch': 'it_CH.ISO8859-1',
  502. 'it_it': 'it_IT.ISO8859-1',
  503. 'italian': 'it_IT.ISO8859-1',
  504. 'iw': 'iw_IL.ISO8859-8',
  505. 'iw_il': 'iw_IL.ISO8859-8',
  506. 'ja': 'ja_JP.eucJP',
  507. 'ja.jis': 'ja_JP.JIS7',
  508. 'ja.sjis': 'ja_JP.SJIS',
  509. 'ja_jp': 'ja_JP.eucJP',
  510. 'ja_jp.ajec': 'ja_JP.eucJP',
  511. 'ja_jp.euc': 'ja_JP.eucJP',
  512. 'ja_jp.eucjp': 'ja_JP.eucJP',
  513. 'ja_jp.iso-2022-jp': 'ja_JP.JIS7',
  514. 'ja_jp.jis': 'ja_JP.JIS7',
  515. 'ja_jp.jis7': 'ja_JP.JIS7',
  516. 'ja_jp.mscode': 'ja_JP.SJIS',
  517. 'ja_jp.sjis': 'ja_JP.SJIS',
  518. 'ja_jp.ujis': 'ja_JP.eucJP',
  519. 'japan': 'ja_JP.eucJP',
  520. 'japanese': 'ja_JP.SJIS',
  521. 'japanese-euc': 'ja_JP.eucJP',
  522. 'japanese.euc': 'ja_JP.eucJP',
  523. 'jp_jp': 'ja_JP.eucJP',
  524. 'ko': 'ko_KR.eucKR',
  525. 'ko_kr': 'ko_KR.eucKR',
  526. 'ko_kr.euc': 'ko_KR.eucKR',
  527. 'korean': 'ko_KR.eucKR',
  528. 'lt': 'lt_LT.ISO8859-4',
  529. 'lv': 'lv_LV.ISO8859-4',
  530. 'mk': 'mk_MK.ISO8859-5',
  531. 'mk_mk': 'mk_MK.ISO8859-5',
  532. 'nl': 'nl_NL.ISO8859-1',
  533. 'nl_be': 'nl_BE.ISO8859-1',
  534. 'nl_nl': 'nl_NL.ISO8859-1',
  535. 'no': 'no_NO.ISO8859-1',
  536. 'no_no': 'no_NO.ISO8859-1',
  537. 'norwegian': 'no_NO.ISO8859-1',
  538. 'pl': 'pl_PL.ISO8859-2',
  539. 'pl_pl': 'pl_PL.ISO8859-2',
  540. 'polish': 'pl_PL.ISO8859-2',
  541. 'portuguese': 'pt_PT.ISO8859-1',
  542. 'portuguese_brazil': 'pt_BR.ISO8859-1',
  543. 'posix': 'C',
  544. 'posix-utf2': 'C',
  545. 'pt': 'pt_PT.ISO8859-1',
  546. 'pt_br': 'pt_BR.ISO8859-1',
  547. 'pt_pt': 'pt_PT.ISO8859-1',
  548. 'ro': 'ro_RO.ISO8859-2',
  549. 'ro_ro': 'ro_RO.ISO8859-2',
  550. 'ru': 'ru_RU.ISO8859-5',
  551. 'ru_ru': 'ru_RU.ISO8859-5',
  552. 'rumanian': 'ro_RO.ISO8859-2',
  553. 'russian': 'ru_RU.ISO8859-5',
  554. 'serbocroatian': 'sh_YU.ISO8859-2',
  555. 'sh': 'sh_YU.ISO8859-2',
  556. 'sh_hr': 'sh_HR.ISO8859-2',
  557. 'sh_sp': 'sh_YU.ISO8859-2',
  558. 'sh_yu': 'sh_YU.ISO8859-2',
  559. 'sk': 'sk_SK.ISO8859-2',
  560. 'sk_sk': 'sk_SK.ISO8859-2',
  561. 'sl': 'sl_CS.ISO8859-2',
  562. 'sl_cs': 'sl_CS.ISO8859-2',
  563. 'sl_si': 'sl_SI.ISO8859-2',
  564. 'slovak': 'sk_SK.ISO8859-2',
  565. 'slovene': 'sl_CS.ISO8859-2',
  566. 'sp': 'sp_YU.ISO8859-5',
  567. 'sp_yu': 'sp_YU.ISO8859-5',
  568. 'spanish': 'es_ES.ISO8859-1',
  569. 'spanish_spain': 'es_ES.ISO8859-1',
  570. 'sr_sp': 'sr_SP.ISO8859-2',
  571. 'sv': 'sv_SE.ISO8859-1',
  572. 'sv_se': 'sv_SE.ISO8859-1',
  573. 'swedish': 'sv_SE.ISO8859-1',
  574. 'th_th': 'th_TH.TACTIS',
  575. 'tr': 'tr_TR.ISO8859-9',
  576. 'tr_tr': 'tr_TR.ISO8859-9',
  577. 'turkish': 'tr_TR.ISO8859-9',
  578. 'univ': 'en_US.utf',
  579. 'universal': 'en_US.utf',
  580. 'zh': 'zh_CN.eucCN',
  581. 'zh_cn': 'zh_CN.eucCN',
  582. 'zh_cn.big5': 'zh_TW.eucTW',
  583. 'zh_cn.euc': 'zh_CN.eucCN',
  584. 'zh_tw': 'zh_TW.eucTW',
  585. 'zh_tw.euc': 'zh_TW.eucTW',
  586. }
  587. #
  588. # This maps Windows language identifiers to locale strings.
  589. #
  590. # This list has been updated from
  591. # http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
  592. # to include every locale up to Windows XP.
  593. #
  594. # NOTE: this mapping is incomplete. If your language is missing, please
  595. # submit a bug report to Python bug manager, which you can find via:
  596. # http://www.python.org/dev/
  597. # Make sure you include the missing language identifier and the suggested
  598. # locale code.
  599. #
  600. windows_locale = {
  601. 0x0436: "af_ZA", # Afrikaans
  602. 0x041c: "sq_AL", # Albanian
  603. 0x0401: "ar_SA", # Arabic - Saudi Arabia
  604. 0x0801: "ar_IQ", # Arabic - Iraq
  605. 0x0c01: "ar_EG", # Arabic - Egypt
  606. 0x1001: "ar_LY", # Arabic - Libya
  607. 0x1401: "ar_DZ", # Arabic - Algeria
  608. 0x1801: "ar_MA", # Arabic - Morocco
  609. 0x1c01: "ar_TN", # Arabic - Tunisia
  610. 0x2001: "ar_OM", # Arabic - Oman
  611. 0x2401: "ar_YE", # Arabic - Yemen
  612. 0x2801: "ar_SY", # Arabic - Syria
  613. 0x2c01: "ar_JO", # Arabic - Jordan
  614. 0x3001: "ar_LB", # Arabic - Lebanon
  615. 0x3401: "ar_KW", # Arabic - Kuwait
  616. 0x3801: "ar_AE", # Arabic - United Arab Emirates
  617. 0x3c01: "ar_BH", # Arabic - Bahrain
  618. 0x4001: "ar_QA", # Arabic - Qatar
  619. 0x042b: "hy_AM", # Armenian
  620. 0x042c: "az_AZ", # Azeri Latin
  621. 0x082c: "az_AZ", # Azeri - Cyrillic
  622. 0x042d: "eu_ES", # Basque
  623. 0x0423: "be_BY", # Belarusian
  624. 0x0445: "bn_IN", # Begali
  625. 0x201a: "bs_BA", # Bosnian
  626. 0x141a: "bs_BA", # Bosnian - Cyrillic
  627. 0x047e: "br_FR", # Breton - France
  628. 0x0402: "bg_BG", # Bulgarian
  629. 0x0403: "ca_ES", # Catalan
  630. 0x0004: "zh_CHS",# Chinese - Simplified
  631. 0x0404: "zh_TW", # Chinese - Taiwan
  632. 0x0804: "zh_CN", # Chinese - PRC
  633. 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
  634. 0x1004: "zh_SG", # Chinese - Singapore
  635. 0x1404: "zh_MO", # Chinese - Macao S.A.R.
  636. 0x7c04: "zh_CHT",# Chinese - Traditional
  637. 0x041a: "hr_HR", # Croatian
  638. 0x101a: "hr_BA", # Croatian - Bosnia
  639. 0x0405: "cs_CZ", # Czech
  640. 0x0406: "da_DK", # Danish
  641. 0x048c: "gbz_AF",# Dari - Afghanistan
  642. 0x0465: "div_MV",# Divehi - Maldives
  643. 0x0413: "nl_NL", # Dutch - The Netherlands
  644. 0x0813: "nl_BE", # Dutch - Belgium
  645. 0x0409: "en_US", # English - United States
  646. 0x0809: "en_GB", # English - United Kingdom
  647. 0x0c09: "en_AU", # English - Australia
  648. 0x1009: "en_CA", # English - Canada
  649. 0x1409: "en_NZ", # English - New Zealand
  650. 0x1809: "en_IE", # English - Ireland
  651. 0x1c09: "en_ZA", # English - South Africa
  652. 0x2009: "en_JA", # English - Jamaica
  653. 0x2409: "en_CB", # English - Carribbean
  654. 0x2809: "en_BZ", # English - Belize
  655. 0x2c09: "en_TT", # English - Trinidad
  656. 0x3009: "en_ZW", # English - Zimbabwe
  657. 0x3409: "en_PH", # English - Phillippines
  658. 0x0425: "et_EE", # Estonian
  659. 0x0438: "fo_FO", # Faroese
  660. 0x0464: "fil_PH",# Filipino
  661. 0x040b: "fi_FI", # Finnish
  662. 0x040c: "fr_FR", # French - France
  663. 0x080c: "fr_BE", # French - Belgium
  664. 0x0c0c: "fr_CA", # French - Canada
  665. 0x100c: "fr_CH", # French - Switzerland
  666. 0x140c: "fr_LU", # French - Luxembourg
  667. 0x180c: "fr_MC", # French - Monaco
  668. 0x0462: "fy_NL", # Frisian - Netherlands
  669. 0x0456: "gl_ES", # Galician
  670. 0x0437: "ka_GE", # Georgian
  671. 0x0407: "de_DE", # German - Germany
  672. 0x0807: "de_CH", # German - Switzerland
  673. 0x0c07: "de_AT", # German - Austria
  674. 0x1007: "de_LU", # German - Luxembourg
  675. 0x1407: "de_LI", # German - Liechtenstein
  676. 0x0408: "el_GR", # Greek
  677. 0x0447: "gu_IN", # Gujarati
  678. 0x040d: "he_IL", # Hebrew
  679. 0x0439: "hi_IN", # Hindi
  680. 0x040e: "hu_HU", # Hungarian
  681. 0x040f: "is_IS", # Icelandic
  682. 0x0421: "id_ID", # Indonesian
  683. 0x045d: "iu_CA", # Inuktitut
  684. 0x085d: "iu_CA", # Inuktitut - Latin
  685. 0x083c: "ga_IE", # Irish - Ireland
  686. 0x0434: "xh_ZA", # Xhosa - South Africa
  687. 0x0435: "zu_ZA", # Zulu
  688. 0x0410: "it_IT", # Italian - Italy
  689. 0x0810: "it_CH", # Italian - Switzerland
  690. 0x0411: "ja_JP", # Japanese
  691. 0x044b: "kn_IN", # Kannada - India
  692. 0x043f: "kk_KZ", # Kazakh
  693. 0x0457: "kok_IN",# Konkani
  694. 0x0412: "ko_KR", # Korean
  695. 0x0440: "ky_KG", # Kyrgyz
  696. 0x0426: "lv_LV", # Latvian
  697. 0x0427: "lt_LT", # Lithuanian
  698. 0x046e: "lb_LU", # Luxembourgish
  699. 0x042f: "mk_MK", # FYRO Macedonian
  700. 0x043e: "ms_MY", # Malay - Malaysia
  701. 0x083e: "ms_BN", # Malay - Brunei
  702. 0x044c: "ml_IN", # Malayalam - India
  703. 0x043a: "mt_MT", # Maltese
  704. 0x0481: "mi_NZ", # Maori
  705. 0x047a: "arn_CL",# Mapudungun
  706. 0x044e: "mr_IN", # Marathi
  707. 0x047c: "moh_CA",# Mohawk - Canada
  708. 0x0450: "mn_MN", # Mongolian
  709. 0x0461: "ne_NP", # Nepali
  710. 0x0414: "nb_NO", # Norwegian - Bokmal
  711. 0x0814: "nn_NO", # Norwegian - Nynorsk
  712. 0x0482: "oc_FR", # Occitan - France
  713. 0x0448: "or_IN", # Oriya - India
  714. 0x0463: "ps_AF", # Pashto - Afghanistan
  715. 0x0429: "fa_IR", # Persian
  716. 0x0415: "pl_PL", # Polish
  717. 0x0416: "pt_BR", # Portuguese - Brazil
  718. 0x0816: "pt_PT", # Portuguese - Portugal
  719. 0x0446: "pa_IN", # Punjabi
  720. 0x046b: "quz_BO",# Quechua (Bolivia)
  721. 0x086b: "quz_EC",# Quechua (Ecuador)
  722. 0x0c6b: "quz_PE",# Quechua (Peru)
  723. 0x0418: "ro_RO", # Romanian - Romania
  724. 0x0417: "rm_CH", # Raeto-Romanese
  725. 0x0419: "ru_RU", # Russian
  726. 0x243b: "smn_FI",# Sami Finland
  727. 0x103b: "smj_NO",# Sami Norway
  728. 0x143b: "smj_SE",# Sami Sweden
  729. 0x043b: "se_NO", # Sami Northern Norway
  730. 0x083b: "se_SE", # Sami Northern Sweden
  731. 0x0c3b: "se_FI", # Sami Northern Finland
  732. 0x203b: "sms_FI",# Sami Skolt
  733. 0x183b: "sma_NO",# Sami Southern Norway
  734. 0x1c3b: "sma_SE",# Sami Southern Sweden
  735. 0x044f: "sa_IN", # Sanskrit
  736. 0x0c1a: "sr_SP", # Serbian - Cyrillic
  737. 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
  738. 0x081a: "sr_SP", # Serbian - Latin
  739. 0x181a: "sr_BA", # Serbian - Bosnia Latin
  740. 0x046c: "ns_ZA", # Northern Sotho
  741. 0x0432: "tn_ZA", # Setswana - Southern Africa
  742. 0x041b: "sk_SK", # Slovak
  743. 0x0424: "sl_SI", # Slovenian
  744. 0x040a: "es_ES", # Spanish - Spain
  745. 0x080a: "es_MX", # Spanish - Mexico
  746. 0x0c0a: "es_ES", # Spanish - Spain (Modern)
  747. 0x100a: "es_GT", # Spanish - Guatemala
  748. 0x140a: "es_CR", # Spanish - Costa Rica
  749. 0x180a: "es_PA", # Spanish - Panama
  750. 0x1c0a: "es_DO", # Spanish - Dominican Republic
  751. 0x200a: "es_VE", # Spanish - Venezuela
  752. 0x240a: "es_CO", # Spanish - Colombia
  753. 0x280a: "es_PE", # Spanish - Peru
  754. 0x2c0a: "es_AR", # Spanish - Argentina
  755. 0x300a: "es_EC", # Spanish - Ecuador
  756. 0x340a: "es_CL", # Spanish - Chile
  757. 0x380a: "es_UR", # Spanish - Uruguay
  758. 0x3c0a: "es_PY", # Spanish - Paraguay
  759. 0x400a: "es_BO", # Spanish - Bolivia
  760. 0x440a: "es_SV", # Spanish - El Salvador
  761. 0x480a: "es_HN", # Spanish - Honduras
  762. 0x4c0a: "es_NI", # Spanish - Nicaragua
  763. 0x500a: "es_PR", # Spanish - Puerto Rico
  764. 0x0441: "sw_KE", # Swahili
  765. 0x041d: "sv_SE", # Swedish - Sweden
  766. 0x081d: "sv_FI", # Swedish - Finland
  767. 0x045a: "syr_SY",# Syriac
  768. 0x0449: "ta_IN", # Tamil
  769. 0x0444: "tt_RU", # Tatar
  770. 0x044a: "te_IN", # Telugu
  771. 0x041e: "th_TH", # Thai
  772. 0x041f: "tr_TR", # Turkish
  773. 0x0422: "uk_UA", # Ukrainian
  774. 0x0420: "ur_PK", # Urdu
  775. 0x0820: "ur_IN", # Urdu - India
  776. 0x0443: "uz_UZ", # Uzbek - Latin
  777. 0x0843: "uz_UZ", # Uzbek - Cyrillic
  778. 0x042a: "vi_VN", # Vietnamese
  779. 0x0452: "cy_GB", # Welsh
  780. }
  781. def _print_locale():
  782. """ Test function.
  783. """
  784. categories = {}
  785. def _init_categories(categories=categories):
  786. for k,v in globals().items():
  787. if k[:3] == 'LC_':
  788. categories[k] = v
  789. _init_categories()
  790. del categories['LC_ALL']
  791. print 'Locale defaults as determined by getdefaultlocale():'
  792. print '-'*72
  793. lang, enc = getdefaultlocale()
  794. print 'Language: ', lang or '(undefined)'
  795. print 'Encoding: ', enc or '(undefined)'
  796. print
  797. print 'Locale settings on startup:'
  798. print '-'*72
  799. for name,category in categories.items():
  800. print name, '...'
  801. lang, enc = getlocale(category)
  802. print ' Language: ', lang or '(undefined)'
  803. print ' Encoding: ', enc or '(undefined)'
  804. print
  805. print
  806. print 'Locale settings after calling resetlocale():'
  807. print '-'*72
  808. resetlocale()
  809. for name,category in categories.items():
  810. print name, '...'
  811. lang, enc = getlocale(category)
  812. print ' Language: ', lang or '(undefined)'
  813. print ' Encoding: ', enc or '(undefined)'
  814. print
  815. try:
  816. setlocale(LC_ALL, "")
  817. except:
  818. print 'NOTE:'
  819. print 'setlocale(LC_ALL, "") does not support the default locale'
  820. print 'given in the OS environment variables.'
  821. else:
  822. print
  823. print 'Locale settings after calling setlocale(LC_ALL, ""):'
  824. print '-'*72
  825. for name,category in categories.items():
  826. print name, '...'
  827. lang, enc = getlocale(category)
  828. print ' Language: ', lang or '(undefined)'
  829. print ' Encoding: ', enc or '(undefined)'
  830. print
  831. ###
  832. try:
  833. LC_MESSAGES
  834. except NameError:
  835. pass
  836. else:
  837. __all__.append("LC_MESSAGES")
  838. if __name__=='__main__':
  839. print 'Locale aliasing:'
  840. print
  841. _print_locale()
  842. print
  843. print 'Number formatting:'
  844. print
  845. _test()