cp1254.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. """ Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py.
  2. Written by Marc-Andre Lemburg ([email protected]).
  3. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
  4. (c) Copyright 2000 Guido van Rossum.
  5. """#"
  6. import codecs
  7. ### Codec APIs
  8. class Codec(codecs.Codec):
  9. def encode(self,input,errors='strict'):
  10. return codecs.charmap_encode(input,errors,encoding_map)
  11. def decode(self,input,errors='strict'):
  12. return codecs.charmap_decode(input,errors,decoding_map)
  13. class StreamWriter(Codec,codecs.StreamWriter):
  14. pass
  15. class StreamReader(Codec,codecs.StreamReader):
  16. pass
  17. ### encodings module API
  18. def getregentry():
  19. return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
  20. ### Decoding Map
  21. decoding_map = codecs.make_identity_dict(range(256))
  22. decoding_map.update({
  23. 0x0080: 0x20ac, # EURO SIGN
  24. 0x0081: None, # UNDEFINED
  25. 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
  26. 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
  27. 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
  28. 0x0085: 0x2026, # HORIZONTAL ELLIPSIS
  29. 0x0086: 0x2020, # DAGGER
  30. 0x0087: 0x2021, # DOUBLE DAGGER
  31. 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
  32. 0x0089: 0x2030, # PER MILLE SIGN
  33. 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
  34. 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
  35. 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE
  36. 0x008d: None, # UNDEFINED
  37. 0x008e: None, # UNDEFINED
  38. 0x008f: None, # UNDEFINED
  39. 0x0090: None, # UNDEFINED
  40. 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
  41. 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
  42. 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
  43. 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
  44. 0x0095: 0x2022, # BULLET
  45. 0x0096: 0x2013, # EN DASH
  46. 0x0097: 0x2014, # EM DASH
  47. 0x0098: 0x02dc, # SMALL TILDE
  48. 0x0099: 0x2122, # TRADE MARK SIGN
  49. 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON
  50. 0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
  51. 0x009c: 0x0153, # LATIN SMALL LIGATURE OE
  52. 0x009d: None, # UNDEFINED
  53. 0x009e: None, # UNDEFINED
  54. 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
  55. 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
  56. 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
  57. 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
  58. 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
  59. 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
  60. 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
  61. })
  62. ### Encoding Map
  63. encoding_map = codecs.make_encoding_map(decoding_map)