whoami7 - Manager
:
/
home
/
fresvfqn
/
waterdamagerestorationandrepairsmithtown.com
/
Compressed
/
Upload File:
files >> /home/fresvfqn/waterdamagerestorationandrepairsmithtown.com/Compressed/encodings.tar
charmap.py 0000644 00000004044 15053241622 0006533 0 ustar 00 """ Generic Python Character Mapping Codec. Use this codec directly rather than through the automatic conversion mechanisms supplied by unicode() and .encode(). Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. """#" import codecs ### Codec APIs class Codec(codecs.Codec): # Note: Binding these as C functions will result in the class not # converting them to methods. This is intended. encode = codecs.charmap_encode decode = codecs.charmap_decode class IncrementalEncoder(codecs.IncrementalEncoder): def __init__(self, errors='strict', mapping=None): codecs.IncrementalEncoder.__init__(self, errors) self.mapping = mapping def encode(self, input, final=False): return codecs.charmap_encode(input, self.errors, self.mapping)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def __init__(self, errors='strict', mapping=None): codecs.IncrementalDecoder.__init__(self, errors) self.mapping = mapping def decode(self, input, final=False): return codecs.charmap_decode(input, self.errors, self.mapping)[0] class StreamWriter(Codec,codecs.StreamWriter): def __init__(self,stream,errors='strict',mapping=None): codecs.StreamWriter.__init__(self,stream,errors) self.mapping = mapping def encode(self,input,errors='strict'): return Codec.encode(input,errors,self.mapping) class StreamReader(Codec,codecs.StreamReader): def __init__(self,stream,errors='strict',mapping=None): codecs.StreamReader.__init__(self,stream,errors) self.mapping = mapping def decode(self,input,errors='strict'): return Codec.decode(input,errors,self.mapping) ### encodings module API def getregentry(): return codecs.CodecInfo( name='charmap', encode=Codec.encode, decode=Codec.decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, ) koi8_t.py 0000644 00000031611 15053241622 0006315 0 ustar 00 """ Python Character Mapping Codec koi8_t """ # http://ru.wikipedia.org/wiki/КОИ-8 # http://www.opensource.apple.com/source/libiconv/libiconv-4/libiconv/tests/KOI8-T.TXT import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='koi8-t', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( '\x00' # 0x00 -> NULL '\x01' # 0x01 -> START OF HEADING '\x02' # 0x02 -> START OF TEXT '\x03' # 0x03 -> END OF TEXT '\x04' # 0x04 -> END OF TRANSMISSION '\x05' # 0x05 -> ENQUIRY '\x06' # 0x06 -> ACKNOWLEDGE '\x07' # 0x07 -> BELL '\x08' # 0x08 -> BACKSPACE '\t' # 0x09 -> HORIZONTAL TABULATION '\n' # 0x0A -> LINE FEED '\x0b' # 0x0B -> VERTICAL TABULATION '\x0c' # 0x0C -> FORM FEED '\r' # 0x0D -> CARRIAGE RETURN '\x0e' # 0x0E -> SHIFT OUT '\x0f' # 0x0F -> SHIFT IN '\x10' # 0x10 -> DATA LINK ESCAPE '\x11' # 0x11 -> DEVICE CONTROL ONE '\x12' # 0x12 -> DEVICE CONTROL TWO '\x13' # 0x13 -> DEVICE CONTROL THREE '\x14' # 0x14 -> DEVICE CONTROL FOUR '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE '\x16' # 0x16 -> SYNCHRONOUS IDLE '\x17' # 0x17 -> END OF TRANSMISSION BLOCK '\x18' # 0x18 -> CANCEL '\x19' # 0x19 -> END OF MEDIUM '\x1a' # 0x1A -> SUBSTITUTE '\x1b' # 0x1B -> ESCAPE '\x1c' # 0x1C -> FILE SEPARATOR '\x1d' # 0x1D -> GROUP SEPARATOR '\x1e' # 0x1E -> RECORD SEPARATOR '\x1f' # 0x1F -> UNIT SEPARATOR ' ' # 0x20 -> SPACE '!' # 0x21 -> EXCLAMATION MARK '"' # 0x22 -> QUOTATION MARK '#' # 0x23 -> NUMBER SIGN '$' # 0x24 -> DOLLAR SIGN '%' # 0x25 -> PERCENT SIGN '&' # 0x26 -> AMPERSAND "'" # 0x27 -> APOSTROPHE '(' # 0x28 -> LEFT PARENTHESIS ')' # 0x29 -> RIGHT PARENTHESIS '*' # 0x2A -> ASTERISK '+' # 0x2B -> PLUS SIGN ',' # 0x2C -> COMMA '-' # 0x2D -> HYPHEN-MINUS '.' # 0x2E -> FULL STOP '/' # 0x2F -> SOLIDUS '0' # 0x30 -> DIGIT ZERO '1' # 0x31 -> DIGIT ONE '2' # 0x32 -> DIGIT TWO '3' # 0x33 -> DIGIT THREE '4' # 0x34 -> DIGIT FOUR '5' # 0x35 -> DIGIT FIVE '6' # 0x36 -> DIGIT SIX '7' # 0x37 -> DIGIT SEVEN '8' # 0x38 -> DIGIT EIGHT '9' # 0x39 -> DIGIT NINE ':' # 0x3A -> COLON ';' # 0x3B -> SEMICOLON '<' # 0x3C -> LESS-THAN SIGN '=' # 0x3D -> EQUALS SIGN '>' # 0x3E -> GREATER-THAN SIGN '?' # 0x3F -> QUESTION MARK '@' # 0x40 -> COMMERCIAL AT 'A' # 0x41 -> LATIN CAPITAL LETTER A 'B' # 0x42 -> LATIN CAPITAL LETTER B 'C' # 0x43 -> LATIN CAPITAL LETTER C 'D' # 0x44 -> LATIN CAPITAL LETTER D 'E' # 0x45 -> LATIN CAPITAL LETTER E 'F' # 0x46 -> LATIN CAPITAL LETTER F 'G' # 0x47 -> LATIN CAPITAL LETTER G 'H' # 0x48 -> LATIN CAPITAL LETTER H 'I' # 0x49 -> LATIN CAPITAL LETTER I 'J' # 0x4A -> LATIN CAPITAL LETTER J 'K' # 0x4B -> LATIN CAPITAL LETTER K 'L' # 0x4C -> LATIN CAPITAL LETTER L 'M' # 0x4D -> LATIN CAPITAL LETTER M 'N' # 0x4E -> LATIN CAPITAL LETTER N 'O' # 0x4F -> LATIN CAPITAL LETTER O 'P' # 0x50 -> LATIN CAPITAL LETTER P 'Q' # 0x51 -> LATIN CAPITAL LETTER Q 'R' # 0x52 -> LATIN CAPITAL LETTER R 'S' # 0x53 -> LATIN CAPITAL LETTER S 'T' # 0x54 -> LATIN CAPITAL LETTER T 'U' # 0x55 -> LATIN CAPITAL LETTER U 'V' # 0x56 -> LATIN CAPITAL LETTER V 'W' # 0x57 -> LATIN CAPITAL LETTER W 'X' # 0x58 -> LATIN CAPITAL LETTER X 'Y' # 0x59 -> LATIN CAPITAL LETTER Y 'Z' # 0x5A -> LATIN CAPITAL LETTER Z '[' # 0x5B -> LEFT SQUARE BRACKET '\\' # 0x5C -> REVERSE SOLIDUS ']' # 0x5D -> RIGHT SQUARE BRACKET '^' # 0x5E -> CIRCUMFLEX ACCENT '_' # 0x5F -> LOW LINE '`' # 0x60 -> GRAVE ACCENT 'a' # 0x61 -> LATIN SMALL LETTER A 'b' # 0x62 -> LATIN SMALL LETTER B 'c' # 0x63 -> LATIN SMALL LETTER C 'd' # 0x64 -> LATIN SMALL LETTER D 'e' # 0x65 -> LATIN SMALL LETTER E 'f' # 0x66 -> LATIN SMALL LETTER F 'g' # 0x67 -> LATIN SMALL LETTER G 'h' # 0x68 -> LATIN SMALL LETTER H 'i' # 0x69 -> LATIN SMALL LETTER I 'j' # 0x6A -> LATIN SMALL LETTER J 'k' # 0x6B -> LATIN SMALL LETTER K 'l' # 0x6C -> LATIN SMALL LETTER L 'm' # 0x6D -> LATIN SMALL LETTER M 'n' # 0x6E -> LATIN SMALL LETTER N 'o' # 0x6F -> LATIN SMALL LETTER O 'p' # 0x70 -> LATIN SMALL LETTER P 'q' # 0x71 -> LATIN SMALL LETTER Q 'r' # 0x72 -> LATIN SMALL LETTER R 's' # 0x73 -> LATIN SMALL LETTER S 't' # 0x74 -> LATIN SMALL LETTER T 'u' # 0x75 -> LATIN SMALL LETTER U 'v' # 0x76 -> LATIN SMALL LETTER V 'w' # 0x77 -> LATIN SMALL LETTER W 'x' # 0x78 -> LATIN SMALL LETTER X 'y' # 0x79 -> LATIN SMALL LETTER Y 'z' # 0x7A -> LATIN SMALL LETTER Z '{' # 0x7B -> LEFT CURLY BRACKET '|' # 0x7C -> VERTICAL LINE '}' # 0x7D -> RIGHT CURLY BRACKET '~' # 0x7E -> TILDE '\x7f' # 0x7F -> DELETE '\u049b' # 0x80 -> CYRILLIC SMALL LETTER KA WITH DESCENDER '\u0493' # 0x81 -> CYRILLIC SMALL LETTER GHE WITH STROKE '\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK '\u0492' # 0x83 -> CYRILLIC CAPITAL LETTER GHE WITH STROKE '\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS '\u2020' # 0x86 -> DAGGER '\u2021' # 0x87 -> DOUBLE DAGGER '\ufffe' # 0x88 -> UNDEFINED '\u2030' # 0x89 -> PER MILLE SIGN '\u04b3' # 0x8A -> CYRILLIC SMALL LETTER HA WITH DESCENDER '\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK '\u04b2' # 0x8C -> CYRILLIC CAPITAL LETTER HA WITH DESCENDER '\u04b7' # 0x8D -> CYRILLIC SMALL LETTER CHE WITH DESCENDER '\u04b6' # 0x8E -> CYRILLIC CAPITAL LETTER CHE WITH DESCENDER '\ufffe' # 0x8F -> UNDEFINED '\u049a' # 0x90 -> CYRILLIC CAPITAL LETTER KA WITH DESCENDER '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK '\u2022' # 0x95 -> BULLET '\u2013' # 0x96 -> EN DASH '\u2014' # 0x97 -> EM DASH '\ufffe' # 0x98 -> UNDEFINED '\u2122' # 0x99 -> TRADE MARK SIGN '\ufffe' # 0x9A -> UNDEFINED '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK '\ufffe' # 0x9C -> UNDEFINED '\ufffe' # 0x9D -> UNDEFINED '\ufffe' # 0x9E -> UNDEFINED '\ufffe' # 0x9F -> UNDEFINED '\ufffe' # 0xA0 -> UNDEFINED '\u04ef' # 0xA1 -> CYRILLIC SMALL LETTER U WITH MACRON '\u04ee' # 0xA2 -> CYRILLIC CAPITAL LETTER U WITH MACRON '\u0451' # 0xA3 -> CYRILLIC SMALL LETTER IO '\xa4' # 0xA4 -> CURRENCY SIGN '\u04e3' # 0xA5 -> CYRILLIC SMALL LETTER I WITH MACRON '\xa6' # 0xA6 -> BROKEN BAR '\xa7' # 0xA7 -> SECTION SIGN '\ufffe' # 0xA8 -> UNDEFINED '\ufffe' # 0xA9 -> UNDEFINED '\ufffe' # 0xAA -> UNDEFINED '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK '\xac' # 0xAC -> NOT SIGN '\xad' # 0xAD -> SOFT HYPHEN '\xae' # 0xAE -> REGISTERED SIGN '\ufffe' # 0xAF -> UNDEFINED '\xb0' # 0xB0 -> DEGREE SIGN '\xb1' # 0xB1 -> PLUS-MINUS SIGN '\xb2' # 0xB2 -> SUPERSCRIPT TWO '\u0401' # 0xB3 -> CYRILLIC CAPITAL LETTER IO '\ufffe' # 0xB4 -> UNDEFINED '\u04e2' # 0xB5 -> CYRILLIC CAPITAL LETTER I WITH MACRON '\xb6' # 0xB6 -> PILCROW SIGN '\xb7' # 0xB7 -> MIDDLE DOT '\ufffe' # 0xB8 -> UNDEFINED '\u2116' # 0xB9 -> NUMERO SIGN '\ufffe' # 0xBA -> UNDEFINED '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK '\ufffe' # 0xBC -> UNDEFINED '\ufffe' # 0xBD -> UNDEFINED '\ufffe' # 0xBE -> UNDEFINED '\xa9' # 0xBF -> COPYRIGHT SIGN '\u044e' # 0xC0 -> CYRILLIC SMALL LETTER YU '\u0430' # 0xC1 -> CYRILLIC SMALL LETTER A '\u0431' # 0xC2 -> CYRILLIC SMALL LETTER BE '\u0446' # 0xC3 -> CYRILLIC SMALL LETTER TSE '\u0434' # 0xC4 -> CYRILLIC SMALL LETTER DE '\u0435' # 0xC5 -> CYRILLIC SMALL LETTER IE '\u0444' # 0xC6 -> CYRILLIC SMALL LETTER EF '\u0433' # 0xC7 -> CYRILLIC SMALL LETTER GHE '\u0445' # 0xC8 -> CYRILLIC SMALL LETTER HA '\u0438' # 0xC9 -> CYRILLIC SMALL LETTER I '\u0439' # 0xCA -> CYRILLIC SMALL LETTER SHORT I '\u043a' # 0xCB -> CYRILLIC SMALL LETTER KA '\u043b' # 0xCC -> CYRILLIC SMALL LETTER EL '\u043c' # 0xCD -> CYRILLIC SMALL LETTER EM '\u043d' # 0xCE -> CYRILLIC SMALL LETTER EN '\u043e' # 0xCF -> CYRILLIC SMALL LETTER O '\u043f' # 0xD0 -> CYRILLIC SMALL LETTER PE '\u044f' # 0xD1 -> CYRILLIC SMALL LETTER YA '\u0440' # 0xD2 -> CYRILLIC SMALL LETTER ER '\u0441' # 0xD3 -> CYRILLIC SMALL LETTER ES '\u0442' # 0xD4 -> CYRILLIC SMALL LETTER TE '\u0443' # 0xD5 -> CYRILLIC SMALL LETTER U '\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE '\u0432' # 0xD7 -> CYRILLIC SMALL LETTER VE '\u044c' # 0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN '\u044b' # 0xD9 -> CYRILLIC SMALL LETTER YERU '\u0437' # 0xDA -> CYRILLIC SMALL LETTER ZE '\u0448' # 0xDB -> CYRILLIC SMALL LETTER SHA '\u044d' # 0xDC -> CYRILLIC SMALL LETTER E '\u0449' # 0xDD -> CYRILLIC SMALL LETTER SHCHA '\u0447' # 0xDE -> CYRILLIC SMALL LETTER CHE '\u044a' # 0xDF -> CYRILLIC SMALL LETTER HARD SIGN '\u042e' # 0xE0 -> CYRILLIC CAPITAL LETTER YU '\u0410' # 0xE1 -> CYRILLIC CAPITAL LETTER A '\u0411' # 0xE2 -> CYRILLIC CAPITAL LETTER BE '\u0426' # 0xE3 -> CYRILLIC CAPITAL LETTER TSE '\u0414' # 0xE4 -> CYRILLIC CAPITAL LETTER DE '\u0415' # 0xE5 -> CYRILLIC CAPITAL LETTER IE '\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF '\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE '\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA '\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I '\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I '\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA '\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL '\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM '\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN '\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O '\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE '\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA '\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER '\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES '\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE '\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U '\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE '\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE '\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN '\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU '\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE '\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA '\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E '\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA '\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE '\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) cp866.py 0000644 00000103534 15053241622 0005772 0 ustar 00 """ Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP866.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp866', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE 0x00b0: 0x2591, # LIGHT SHADE 0x00b1: 0x2592, # MEDIUM SHADE 0x00b2: 0x2593, # DARK SHADE 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x00db: 0x2588, # FULL BLOCK 0x00dc: 0x2584, # LOWER HALF BLOCK 0x00dd: 0x258c, # LEFT HALF BLOCK 0x00de: 0x2590, # RIGHT HALF BLOCK 0x00df: 0x2580, # UPPER HALF BLOCK 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U 0x00f8: 0x00b0, # DEGREE SIGN 0x00f9: 0x2219, # BULLET OPERATOR 0x00fa: 0x00b7, # MIDDLE DOT 0x00fb: 0x221a, # SQUARE ROOT 0x00fc: 0x2116, # NUMERO SIGN 0x00fd: 0x00a4, # CURRENCY SIGN 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> NULL u'\x01' # 0x0001 -> START OF HEADING u'\x02' # 0x0002 -> START OF TEXT u'\x03' # 0x0003 -> END OF TEXT u'\x04' # 0x0004 -> END OF TRANSMISSION u'\x05' # 0x0005 -> ENQUIRY u'\x06' # 0x0006 -> ACKNOWLEDGE u'\x07' # 0x0007 -> BELL u'\x08' # 0x0008 -> BACKSPACE u'\t' # 0x0009 -> HORIZONTAL TABULATION u'\n' # 0x000a -> LINE FEED u'\x0b' # 0x000b -> VERTICAL TABULATION u'\x0c' # 0x000c -> FORM FEED u'\r' # 0x000d -> CARRIAGE RETURN u'\x0e' # 0x000e -> SHIFT OUT u'\x0f' # 0x000f -> SHIFT IN u'\x10' # 0x0010 -> DATA LINK ESCAPE u'\x11' # 0x0011 -> DEVICE CONTROL ONE u'\x12' # 0x0012 -> DEVICE CONTROL TWO u'\x13' # 0x0013 -> DEVICE CONTROL THREE u'\x14' # 0x0014 -> DEVICE CONTROL FOUR u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x0016 -> SYNCHRONOUS IDLE u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK u'\x18' # 0x0018 -> CANCEL u'\x19' # 0x0019 -> END OF MEDIUM u'\x1a' # 0x001a -> SUBSTITUTE u'\x1b' # 0x001b -> ESCAPE u'\x1c' # 0x001c -> FILE SEPARATOR u'\x1d' # 0x001d -> GROUP SEPARATOR u'\x1e' # 0x001e -> RECORD SEPARATOR u'\x1f' # 0x001f -> UNIT SEPARATOR u' ' # 0x0020 -> SPACE u'!' # 0x0021 -> EXCLAMATION MARK u'"' # 0x0022 -> QUOTATION MARK u'#' # 0x0023 -> NUMBER SIGN u'$' # 0x0024 -> DOLLAR SIGN u'%' # 0x0025 -> PERCENT SIGN u'&' # 0x0026 -> AMPERSAND u"'" # 0x0027 -> APOSTROPHE u'(' # 0x0028 -> LEFT PARENTHESIS u')' # 0x0029 -> RIGHT PARENTHESIS u'*' # 0x002a -> ASTERISK u'+' # 0x002b -> PLUS SIGN u',' # 0x002c -> COMMA u'-' # 0x002d -> HYPHEN-MINUS u'.' # 0x002e -> FULL STOP u'/' # 0x002f -> SOLIDUS u'0' # 0x0030 -> DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE u'2' # 0x0032 -> DIGIT TWO u'3' # 0x0033 -> DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE u':' # 0x003a -> COLON u';' # 0x003b -> SEMICOLON u'<' # 0x003c -> LESS-THAN SIGN u'=' # 0x003d -> EQUALS SIGN u'>' # 0x003e -> GREATER-THAN SIGN u'?' # 0x003f -> QUESTION MARK u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET u'\\' # 0x005c -> REVERSE SOLIDUS u']' # 0x005d -> RIGHT SQUARE BRACKET u'^' # 0x005e -> CIRCUMFLEX ACCENT u'_' # 0x005f -> LOW LINE u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET u'|' # 0x007c -> VERTICAL LINE u'}' # 0x007d -> RIGHT CURLY BRACKET u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> DELETE u'\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A u'\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE u'\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE u'\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE u'\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE u'\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE u'\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE u'\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE u'\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I u'\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I u'\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA u'\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL u'\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM u'\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN u'\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O u'\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE u'\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER u'\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES u'\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE u'\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U u'\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF u'\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA u'\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE u'\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE u'\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA u'\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA u'\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN u'\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU u'\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN u'\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E u'\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU u'\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A u'\u0431' # 0x00a1 -> CYRILLIC SMALL LETTER BE u'\u0432' # 0x00a2 -> CYRILLIC SMALL LETTER VE u'\u0433' # 0x00a3 -> CYRILLIC SMALL LETTER GHE u'\u0434' # 0x00a4 -> CYRILLIC SMALL LETTER DE u'\u0435' # 0x00a5 -> CYRILLIC SMALL LETTER IE u'\u0436' # 0x00a6 -> CYRILLIC SMALL LETTER ZHE u'\u0437' # 0x00a7 -> CYRILLIC SMALL LETTER ZE u'\u0438' # 0x00a8 -> CYRILLIC SMALL LETTER I u'\u0439' # 0x00a9 -> CYRILLIC SMALL LETTER SHORT I u'\u043a' # 0x00aa -> CYRILLIC SMALL LETTER KA u'\u043b' # 0x00ab -> CYRILLIC SMALL LETTER EL u'\u043c' # 0x00ac -> CYRILLIC SMALL LETTER EM u'\u043d' # 0x00ad -> CYRILLIC SMALL LETTER EN u'\u043e' # 0x00ae -> CYRILLIC SMALL LETTER O u'\u043f' # 0x00af -> CYRILLIC SMALL LETTER PE u'\u2591' # 0x00b0 -> LIGHT SHADE u'\u2592' # 0x00b1 -> MEDIUM SHADE u'\u2593' # 0x00b2 -> DARK SHADE u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT u'\u2588' # 0x00db -> FULL BLOCK u'\u2584' # 0x00dc -> LOWER HALF BLOCK u'\u258c' # 0x00dd -> LEFT HALF BLOCK u'\u2590' # 0x00de -> RIGHT HALF BLOCK u'\u2580' # 0x00df -> UPPER HALF BLOCK u'\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER u'\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES u'\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE u'\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U u'\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF u'\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA u'\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE u'\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE u'\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA u'\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA u'\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN u'\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU u'\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN u'\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E u'\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU u'\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA u'\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO u'\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO u'\u0404' # 0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE u'\u0454' # 0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE u'\u0407' # 0x00f4 -> CYRILLIC CAPITAL LETTER YI u'\u0457' # 0x00f5 -> CYRILLIC SMALL LETTER YI u'\u040e' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U u'\u045e' # 0x00f7 -> CYRILLIC SMALL LETTER SHORT U u'\xb0' # 0x00f8 -> DEGREE SIGN u'\u2219' # 0x00f9 -> BULLET OPERATOR u'\xb7' # 0x00fa -> MIDDLE DOT u'\u221a' # 0x00fb -> SQUARE ROOT u'\u2116' # 0x00fc -> NUMERO SIGN u'\xa4' # 0x00fd -> CURRENCY SIGN u'\u25a0' # 0x00fe -> BLACK SQUARE u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # NULL 0x0001: 0x0001, # START OF HEADING 0x0002: 0x0002, # START OF TEXT 0x0003: 0x0003, # END OF TEXT 0x0004: 0x0004, # END OF TRANSMISSION 0x0005: 0x0005, # ENQUIRY 0x0006: 0x0006, # ACKNOWLEDGE 0x0007: 0x0007, # BELL 0x0008: 0x0008, # BACKSPACE 0x0009: 0x0009, # HORIZONTAL TABULATION 0x000a: 0x000a, # LINE FEED 0x000b: 0x000b, # VERTICAL TABULATION 0x000c: 0x000c, # FORM FEED 0x000d: 0x000d, # CARRIAGE RETURN 0x000e: 0x000e, # SHIFT OUT 0x000f: 0x000f, # SHIFT IN 0x0010: 0x0010, # DATA LINK ESCAPE 0x0011: 0x0011, # DEVICE CONTROL ONE 0x0012: 0x0012, # DEVICE CONTROL TWO 0x0013: 0x0013, # DEVICE CONTROL THREE 0x0014: 0x0014, # DEVICE CONTROL FOUR 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE 0x0016: 0x0016, # SYNCHRONOUS IDLE 0x0017: 0x0017, # END OF TRANSMISSION BLOCK 0x0018: 0x0018, # CANCEL 0x0019: 0x0019, # END OF MEDIUM 0x001a: 0x001a, # SUBSTITUTE 0x001b: 0x001b, # ESCAPE 0x001c: 0x001c, # FILE SEPARATOR 0x001d: 0x001d, # GROUP SEPARATOR 0x001e: 0x001e, # RECORD SEPARATOR 0x001f: 0x001f, # UNIT SEPARATOR 0x0020: 0x0020, # SPACE 0x0021: 0x0021, # EXCLAMATION MARK 0x0022: 0x0022, # QUOTATION MARK 0x0023: 0x0023, # NUMBER SIGN 0x0024: 0x0024, # DOLLAR SIGN 0x0025: 0x0025, # PERCENT SIGN 0x0026: 0x0026, # AMPERSAND 0x0027: 0x0027, # APOSTROPHE 0x0028: 0x0028, # LEFT PARENTHESIS 0x0029: 0x0029, # RIGHT PARENTHESIS 0x002a: 0x002a, # ASTERISK 0x002b: 0x002b, # PLUS SIGN 0x002c: 0x002c, # COMMA 0x002d: 0x002d, # HYPHEN-MINUS 0x002e: 0x002e, # FULL STOP 0x002f: 0x002f, # SOLIDUS 0x0030: 0x0030, # DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE 0x0032: 0x0032, # DIGIT TWO 0x0033: 0x0033, # DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE 0x003a: 0x003a, # COLON 0x003b: 0x003b, # SEMICOLON 0x003c: 0x003c, # LESS-THAN SIGN 0x003d: 0x003d, # EQUALS SIGN 0x003e: 0x003e, # GREATER-THAN SIGN 0x003f: 0x003f, # QUESTION MARK 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005c: 0x005c, # REVERSE SOLIDUS 0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005f: 0x005f, # LOW LINE 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET 0x007c: 0x007c, # VERTICAL LINE 0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # DELETE 0x00a0: 0x00ff, # NO-BREAK SPACE 0x00a4: 0x00fd, # CURRENCY SIGN 0x00b0: 0x00f8, # DEGREE SIGN 0x00b7: 0x00fa, # MIDDLE DOT 0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO 0x0404: 0x00f2, # CYRILLIC CAPITAL LETTER UKRAINIAN IE 0x0407: 0x00f4, # CYRILLIC CAPITAL LETTER YI 0x040e: 0x00f6, # CYRILLIC CAPITAL LETTER SHORT U 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE 0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE 0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE 0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE 0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE 0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE 0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I 0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I 0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA 0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL 0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM 0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN 0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O 0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE 0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER 0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES 0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE 0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U 0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF 0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA 0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE 0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE 0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA 0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA 0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN 0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU 0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN 0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E 0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU 0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A 0x0431: 0x00a1, # CYRILLIC SMALL LETTER BE 0x0432: 0x00a2, # CYRILLIC SMALL LETTER VE 0x0433: 0x00a3, # CYRILLIC SMALL LETTER GHE 0x0434: 0x00a4, # CYRILLIC SMALL LETTER DE 0x0435: 0x00a5, # CYRILLIC SMALL LETTER IE 0x0436: 0x00a6, # CYRILLIC SMALL LETTER ZHE 0x0437: 0x00a7, # CYRILLIC SMALL LETTER ZE 0x0438: 0x00a8, # CYRILLIC SMALL LETTER I 0x0439: 0x00a9, # CYRILLIC SMALL LETTER SHORT I 0x043a: 0x00aa, # CYRILLIC SMALL LETTER KA 0x043b: 0x00ab, # CYRILLIC SMALL LETTER EL 0x043c: 0x00ac, # CYRILLIC SMALL LETTER EM 0x043d: 0x00ad, # CYRILLIC SMALL LETTER EN 0x043e: 0x00ae, # CYRILLIC SMALL LETTER O 0x043f: 0x00af, # CYRILLIC SMALL LETTER PE 0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER 0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES 0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE 0x0443: 0x00e3, # CYRILLIC SMALL LETTER U 0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF 0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA 0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE 0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE 0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA 0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA 0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN 0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU 0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN 0x044d: 0x00ed, # CYRILLIC SMALL LETTER E 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO 0x0454: 0x00f3, # CYRILLIC SMALL LETTER UKRAINIAN IE 0x0457: 0x00f5, # CYRILLIC SMALL LETTER YI 0x045e: 0x00f7, # CYRILLIC SMALL LETTER SHORT U 0x2116: 0x00fc, # NUMERO SIGN 0x2219: 0x00f9, # BULLET OPERATOR 0x221a: 0x00fb, # SQUARE ROOT 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x2580: 0x00df, # UPPER HALF BLOCK 0x2584: 0x00dc, # LOWER HALF BLOCK 0x2588: 0x00db, # FULL BLOCK 0x258c: 0x00dd, # LEFT HALF BLOCK 0x2590: 0x00de, # RIGHT HALF BLOCK 0x2591: 0x00b0, # LIGHT SHADE 0x2592: 0x00b1, # MEDIUM SHADE 0x2593: 0x00b2, # DARK SHADE 0x25a0: 0x00fe, # BLACK SQUARE } cp863.py 0000644 00000103314 15053241622 0005763 0 ustar 00 """ Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP863.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp863', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE 0x0086: 0x00b6, # PILCROW SIGN 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x008d: 0x2017, # DOUBLE LOW LINE 0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE 0x008f: 0x00a7, # SECTION SIGN 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE 0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS 0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE 0x0098: 0x00a4, # CURRENCY SIGN 0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x009b: 0x00a2, # CENT SIGN 0x009c: 0x00a3, # POUND SIGN 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK 0x00a0: 0x00a6, # BROKEN BAR 0x00a1: 0x00b4, # ACUTE ACCENT 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x00a4: 0x00a8, # DIAERESIS 0x00a5: 0x00b8, # CEDILLA 0x00a6: 0x00b3, # SUPERSCRIPT THREE 0x00a7: 0x00af, # MACRON 0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0x00a9: 0x2310, # REVERSED NOT SIGN 0x00aa: 0x00ac, # NOT SIGN 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER 0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00b0: 0x2591, # LIGHT SHADE 0x00b1: 0x2592, # MEDIUM SHADE 0x00b2: 0x2593, # DARK SHADE 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x00db: 0x2588, # FULL BLOCK 0x00dc: 0x2584, # LOWER HALF BLOCK 0x00dd: 0x258c, # LEFT HALF BLOCK 0x00de: 0x2590, # RIGHT HALF BLOCK 0x00df: 0x2580, # UPPER HALF BLOCK 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA 0x00e3: 0x03c0, # GREEK SMALL LETTER PI 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA 0x00e6: 0x00b5, # MICRO SIGN 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA 0x00ec: 0x221e, # INFINITY 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON 0x00ef: 0x2229, # INTERSECTION 0x00f0: 0x2261, # IDENTICAL TO 0x00f1: 0x00b1, # PLUS-MINUS SIGN 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO 0x00f4: 0x2320, # TOP HALF INTEGRAL 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL 0x00f6: 0x00f7, # DIVISION SIGN 0x00f7: 0x2248, # ALMOST EQUAL TO 0x00f8: 0x00b0, # DEGREE SIGN 0x00f9: 0x2219, # BULLET OPERATOR 0x00fa: 0x00b7, # MIDDLE DOT 0x00fb: 0x221a, # SQUARE ROOT 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> NULL u'\x01' # 0x0001 -> START OF HEADING u'\x02' # 0x0002 -> START OF TEXT u'\x03' # 0x0003 -> END OF TEXT u'\x04' # 0x0004 -> END OF TRANSMISSION u'\x05' # 0x0005 -> ENQUIRY u'\x06' # 0x0006 -> ACKNOWLEDGE u'\x07' # 0x0007 -> BELL u'\x08' # 0x0008 -> BACKSPACE u'\t' # 0x0009 -> HORIZONTAL TABULATION u'\n' # 0x000a -> LINE FEED u'\x0b' # 0x000b -> VERTICAL TABULATION u'\x0c' # 0x000c -> FORM FEED u'\r' # 0x000d -> CARRIAGE RETURN u'\x0e' # 0x000e -> SHIFT OUT u'\x0f' # 0x000f -> SHIFT IN u'\x10' # 0x0010 -> DATA LINK ESCAPE u'\x11' # 0x0011 -> DEVICE CONTROL ONE u'\x12' # 0x0012 -> DEVICE CONTROL TWO u'\x13' # 0x0013 -> DEVICE CONTROL THREE u'\x14' # 0x0014 -> DEVICE CONTROL FOUR u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x0016 -> SYNCHRONOUS IDLE u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK u'\x18' # 0x0018 -> CANCEL u'\x19' # 0x0019 -> END OF MEDIUM u'\x1a' # 0x001a -> SUBSTITUTE u'\x1b' # 0x001b -> ESCAPE u'\x1c' # 0x001c -> FILE SEPARATOR u'\x1d' # 0x001d -> GROUP SEPARATOR u'\x1e' # 0x001e -> RECORD SEPARATOR u'\x1f' # 0x001f -> UNIT SEPARATOR u' ' # 0x0020 -> SPACE u'!' # 0x0021 -> EXCLAMATION MARK u'"' # 0x0022 -> QUOTATION MARK u'#' # 0x0023 -> NUMBER SIGN u'$' # 0x0024 -> DOLLAR SIGN u'%' # 0x0025 -> PERCENT SIGN u'&' # 0x0026 -> AMPERSAND u"'" # 0x0027 -> APOSTROPHE u'(' # 0x0028 -> LEFT PARENTHESIS u')' # 0x0029 -> RIGHT PARENTHESIS u'*' # 0x002a -> ASTERISK u'+' # 0x002b -> PLUS SIGN u',' # 0x002c -> COMMA u'-' # 0x002d -> HYPHEN-MINUS u'.' # 0x002e -> FULL STOP u'/' # 0x002f -> SOLIDUS u'0' # 0x0030 -> DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE u'2' # 0x0032 -> DIGIT TWO u'3' # 0x0033 -> DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE u':' # 0x003a -> COLON u';' # 0x003b -> SEMICOLON u'<' # 0x003c -> LESS-THAN SIGN u'=' # 0x003d -> EQUALS SIGN u'>' # 0x003e -> GREATER-THAN SIGN u'?' # 0x003f -> QUESTION MARK u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET u'\\' # 0x005c -> REVERSE SOLIDUS u']' # 0x005d -> RIGHT SQUARE BRACKET u'^' # 0x005e -> CIRCUMFLEX ACCENT u'_' # 0x005f -> LOW LINE u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET u'|' # 0x007c -> VERTICAL LINE u'}' # 0x007d -> RIGHT CURLY BRACKET u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> DELETE u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xc2' # 0x0084 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE u'\xb6' # 0x0086 -> PILCROW SIGN u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\u2017' # 0x008d -> DOUBLE LOW LINE u'\xc0' # 0x008e -> LATIN CAPITAL LETTER A WITH GRAVE u'\xa7' # 0x008f -> SECTION SIGN u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xc8' # 0x0091 -> LATIN CAPITAL LETTER E WITH GRAVE u'\xca' # 0x0092 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xcb' # 0x0094 -> LATIN CAPITAL LETTER E WITH DIAERESIS u'\xcf' # 0x0095 -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE u'\xa4' # 0x0098 -> CURRENCY SIGN u'\xd4' # 0x0099 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xa2' # 0x009b -> CENT SIGN u'\xa3' # 0x009c -> POUND SIGN u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE u'\xdb' # 0x009e -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK u'\xa6' # 0x00a0 -> BROKEN BAR u'\xb4' # 0x00a1 -> ACUTE ACCENT u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE u'\xa8' # 0x00a4 -> DIAERESIS u'\xb8' # 0x00a5 -> CEDILLA u'\xb3' # 0x00a6 -> SUPERSCRIPT THREE u'\xaf' # 0x00a7 -> MACRON u'\xce' # 0x00a8 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX u'\u2310' # 0x00a9 -> REVERSED NOT SIGN u'\xac' # 0x00aa -> NOT SIGN u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER u'\xbe' # 0x00ad -> VULGAR FRACTION THREE QUARTERS u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2591' # 0x00b0 -> LIGHT SHADE u'\u2592' # 0x00b1 -> MEDIUM SHADE u'\u2593' # 0x00b2 -> DARK SHADE u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT u'\u2588' # 0x00db -> FULL BLOCK u'\u2584' # 0x00dc -> LOWER HALF BLOCK u'\u258c' # 0x00dd -> LEFT HALF BLOCK u'\u2590' # 0x00de -> RIGHT HALF BLOCK u'\u2580' # 0x00df -> UPPER HALF BLOCK u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA u'\xb5' # 0x00e6 -> MICRO SIGN u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA u'\u221e' # 0x00ec -> INFINITY u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON u'\u2229' # 0x00ef -> INTERSECTION u'\u2261' # 0x00f0 -> IDENTICAL TO u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL u'\xf7' # 0x00f6 -> DIVISION SIGN u'\u2248' # 0x00f7 -> ALMOST EQUAL TO u'\xb0' # 0x00f8 -> DEGREE SIGN u'\u2219' # 0x00f9 -> BULLET OPERATOR u'\xb7' # 0x00fa -> MIDDLE DOT u'\u221a' # 0x00fb -> SQUARE ROOT u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N u'\xb2' # 0x00fd -> SUPERSCRIPT TWO u'\u25a0' # 0x00fe -> BLACK SQUARE u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # NULL 0x0001: 0x0001, # START OF HEADING 0x0002: 0x0002, # START OF TEXT 0x0003: 0x0003, # END OF TEXT 0x0004: 0x0004, # END OF TRANSMISSION 0x0005: 0x0005, # ENQUIRY 0x0006: 0x0006, # ACKNOWLEDGE 0x0007: 0x0007, # BELL 0x0008: 0x0008, # BACKSPACE 0x0009: 0x0009, # HORIZONTAL TABULATION 0x000a: 0x000a, # LINE FEED 0x000b: 0x000b, # VERTICAL TABULATION 0x000c: 0x000c, # FORM FEED 0x000d: 0x000d, # CARRIAGE RETURN 0x000e: 0x000e, # SHIFT OUT 0x000f: 0x000f, # SHIFT IN 0x0010: 0x0010, # DATA LINK ESCAPE 0x0011: 0x0011, # DEVICE CONTROL ONE 0x0012: 0x0012, # DEVICE CONTROL TWO 0x0013: 0x0013, # DEVICE CONTROL THREE 0x0014: 0x0014, # DEVICE CONTROL FOUR 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE 0x0016: 0x0016, # SYNCHRONOUS IDLE 0x0017: 0x0017, # END OF TRANSMISSION BLOCK 0x0018: 0x0018, # CANCEL 0x0019: 0x0019, # END OF MEDIUM 0x001a: 0x001a, # SUBSTITUTE 0x001b: 0x001b, # ESCAPE 0x001c: 0x001c, # FILE SEPARATOR 0x001d: 0x001d, # GROUP SEPARATOR 0x001e: 0x001e, # RECORD SEPARATOR 0x001f: 0x001f, # UNIT SEPARATOR 0x0020: 0x0020, # SPACE 0x0021: 0x0021, # EXCLAMATION MARK 0x0022: 0x0022, # QUOTATION MARK 0x0023: 0x0023, # NUMBER SIGN 0x0024: 0x0024, # DOLLAR SIGN 0x0025: 0x0025, # PERCENT SIGN 0x0026: 0x0026, # AMPERSAND 0x0027: 0x0027, # APOSTROPHE 0x0028: 0x0028, # LEFT PARENTHESIS 0x0029: 0x0029, # RIGHT PARENTHESIS 0x002a: 0x002a, # ASTERISK 0x002b: 0x002b, # PLUS SIGN 0x002c: 0x002c, # COMMA 0x002d: 0x002d, # HYPHEN-MINUS 0x002e: 0x002e, # FULL STOP 0x002f: 0x002f, # SOLIDUS 0x0030: 0x0030, # DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE 0x0032: 0x0032, # DIGIT TWO 0x0033: 0x0033, # DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE 0x003a: 0x003a, # COLON 0x003b: 0x003b, # SEMICOLON 0x003c: 0x003c, # LESS-THAN SIGN 0x003d: 0x003d, # EQUALS SIGN 0x003e: 0x003e, # GREATER-THAN SIGN 0x003f: 0x003f, # QUESTION MARK 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005c: 0x005c, # REVERSE SOLIDUS 0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005f: 0x005f, # LOW LINE 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET 0x007c: 0x007c, # VERTICAL LINE 0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # DELETE 0x00a0: 0x00ff, # NO-BREAK SPACE 0x00a2: 0x009b, # CENT SIGN 0x00a3: 0x009c, # POUND SIGN 0x00a4: 0x0098, # CURRENCY SIGN 0x00a6: 0x00a0, # BROKEN BAR 0x00a7: 0x008f, # SECTION SIGN 0x00a8: 0x00a4, # DIAERESIS 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00ac: 0x00aa, # NOT SIGN 0x00af: 0x00a7, # MACRON 0x00b0: 0x00f8, # DEGREE SIGN 0x00b1: 0x00f1, # PLUS-MINUS SIGN 0x00b2: 0x00fd, # SUPERSCRIPT TWO 0x00b3: 0x00a6, # SUPERSCRIPT THREE 0x00b4: 0x00a1, # ACUTE ACCENT 0x00b5: 0x00e6, # MICRO SIGN 0x00b6: 0x0086, # PILCROW SIGN 0x00b7: 0x00fa, # MIDDLE DOT 0x00b8: 0x00a5, # CEDILLA 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF 0x00be: 0x00ad, # VULGAR FRACTION THREE QUARTERS 0x00c0: 0x008e, # LATIN CAPITAL LETTER A WITH GRAVE 0x00c2: 0x0084, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA 0x00c8: 0x0091, # LATIN CAPITAL LETTER E WITH GRAVE 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE 0x00ca: 0x0092, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x00cb: 0x0094, # LATIN CAPITAL LETTER E WITH DIAERESIS 0x00ce: 0x00a8, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0x00cf: 0x0095, # LATIN CAPITAL LETTER I WITH DIAERESIS 0x00d4: 0x0099, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE 0x00db: 0x009e, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00f7: 0x00f6, # DIVISION SIGN 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON 0x03c0: 0x00e3, # GREEK SMALL LETTER PI 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI 0x2017: 0x008d, # DOUBLE LOW LINE 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N 0x2219: 0x00f9, # BULLET OPERATOR 0x221a: 0x00fb, # SQUARE ROOT 0x221e: 0x00ec, # INFINITY 0x2229: 0x00ef, # INTERSECTION 0x2248: 0x00f7, # ALMOST EQUAL TO 0x2261: 0x00f0, # IDENTICAL TO 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO 0x2310: 0x00a9, # REVERSED NOT SIGN 0x2320: 0x00f4, # TOP HALF INTEGRAL 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x2580: 0x00df, # UPPER HALF BLOCK 0x2584: 0x00dc, # LOWER HALF BLOCK 0x2588: 0x00db, # FULL BLOCK 0x258c: 0x00dd, # LEFT HALF BLOCK 0x2590: 0x00de, # RIGHT HALF BLOCK 0x2591: 0x00b0, # LIGHT SHADE 0x2592: 0x00b1, # MEDIUM SHADE 0x2593: 0x00b2, # DARK SHADE 0x25a0: 0x00fe, # BLACK SQUARE } cp424.py 0000644 00000030027 15053241622 0005754 0 ustar 00 """ Python Character Mapping Codec cp424 generated from 'MAPPINGS/VENDORS/MISC/CP424.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp424', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x9c' # 0x04 -> SELECT u'\t' # 0x05 -> HORIZONTAL TABULATION u'\x86' # 0x06 -> REQUIRED NEW LINE u'\x7f' # 0x07 -> DELETE u'\x97' # 0x08 -> GRAPHIC ESCAPE u'\x8d' # 0x09 -> SUPERSCRIPT u'\x8e' # 0x0A -> REPEAT u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x9d' # 0x14 -> RESTORE/ENABLE PRESENTATION u'\x85' # 0x15 -> NEW LINE u'\x08' # 0x16 -> BACKSPACE u'\x87' # 0x17 -> PROGRAM OPERATOR COMMUNICATION u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x92' # 0x1A -> UNIT BACK SPACE u'\x8f' # 0x1B -> CUSTOMER USE ONE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u'\x80' # 0x20 -> DIGIT SELECT u'\x81' # 0x21 -> START OF SIGNIFICANCE u'\x82' # 0x22 -> FIELD SEPARATOR u'\x83' # 0x23 -> WORD UNDERSCORE u'\x84' # 0x24 -> BYPASS OR INHIBIT PRESENTATION u'\n' # 0x25 -> LINE FEED u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK u'\x1b' # 0x27 -> ESCAPE u'\x88' # 0x28 -> SET ATTRIBUTE u'\x89' # 0x29 -> START FIELD EXTENDED u'\x8a' # 0x2A -> SET MODE OR SWITCH u'\x8b' # 0x2B -> CONTROL SEQUENCE PREFIX u'\x8c' # 0x2C -> MODIFY FIELD ATTRIBUTE u'\x05' # 0x2D -> ENQUIRY u'\x06' # 0x2E -> ACKNOWLEDGE u'\x07' # 0x2F -> BELL u'\x90' # 0x30 -> <reserved> u'\x91' # 0x31 -> <reserved> u'\x16' # 0x32 -> SYNCHRONOUS IDLE u'\x93' # 0x33 -> INDEX RETURN u'\x94' # 0x34 -> PRESENTATION POSITION u'\x95' # 0x35 -> TRANSPARENT u'\x96' # 0x36 -> NUMERIC BACKSPACE u'\x04' # 0x37 -> END OF TRANSMISSION u'\x98' # 0x38 -> SUBSCRIPT u'\x99' # 0x39 -> INDENT TABULATION u'\x9a' # 0x3A -> REVERSE FORM FEED u'\x9b' # 0x3B -> CUSTOMER USE THREE u'\x14' # 0x3C -> DEVICE CONTROL FOUR u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE u'\x9e' # 0x3E -> <reserved> u'\x1a' # 0x3F -> SUBSTITUTE u' ' # 0x40 -> SPACE u'\u05d0' # 0x41 -> HEBREW LETTER ALEF u'\u05d1' # 0x42 -> HEBREW LETTER BET u'\u05d2' # 0x43 -> HEBREW LETTER GIMEL u'\u05d3' # 0x44 -> HEBREW LETTER DALET u'\u05d4' # 0x45 -> HEBREW LETTER HE u'\u05d5' # 0x46 -> HEBREW LETTER VAV u'\u05d6' # 0x47 -> HEBREW LETTER ZAYIN u'\u05d7' # 0x48 -> HEBREW LETTER HET u'\u05d8' # 0x49 -> HEBREW LETTER TET u'\xa2' # 0x4A -> CENT SIGN u'.' # 0x4B -> FULL STOP u'<' # 0x4C -> LESS-THAN SIGN u'(' # 0x4D -> LEFT PARENTHESIS u'+' # 0x4E -> PLUS SIGN u'|' # 0x4F -> VERTICAL LINE u'&' # 0x50 -> AMPERSAND u'\u05d9' # 0x51 -> HEBREW LETTER YOD u'\u05da' # 0x52 -> HEBREW LETTER FINAL KAF u'\u05db' # 0x53 -> HEBREW LETTER KAF u'\u05dc' # 0x54 -> HEBREW LETTER LAMED u'\u05dd' # 0x55 -> HEBREW LETTER FINAL MEM u'\u05de' # 0x56 -> HEBREW LETTER MEM u'\u05df' # 0x57 -> HEBREW LETTER FINAL NUN u'\u05e0' # 0x58 -> HEBREW LETTER NUN u'\u05e1' # 0x59 -> HEBREW LETTER SAMEKH u'!' # 0x5A -> EXCLAMATION MARK u'$' # 0x5B -> DOLLAR SIGN u'*' # 0x5C -> ASTERISK u')' # 0x5D -> RIGHT PARENTHESIS u';' # 0x5E -> SEMICOLON u'\xac' # 0x5F -> NOT SIGN u'-' # 0x60 -> HYPHEN-MINUS u'/' # 0x61 -> SOLIDUS u'\u05e2' # 0x62 -> HEBREW LETTER AYIN u'\u05e3' # 0x63 -> HEBREW LETTER FINAL PE u'\u05e4' # 0x64 -> HEBREW LETTER PE u'\u05e5' # 0x65 -> HEBREW LETTER FINAL TSADI u'\u05e6' # 0x66 -> HEBREW LETTER TSADI u'\u05e7' # 0x67 -> HEBREW LETTER QOF u'\u05e8' # 0x68 -> HEBREW LETTER RESH u'\u05e9' # 0x69 -> HEBREW LETTER SHIN u'\xa6' # 0x6A -> BROKEN BAR u',' # 0x6B -> COMMA u'%' # 0x6C -> PERCENT SIGN u'_' # 0x6D -> LOW LINE u'>' # 0x6E -> GREATER-THAN SIGN u'?' # 0x6F -> QUESTION MARK u'\ufffe' # 0x70 -> UNDEFINED u'\u05ea' # 0x71 -> HEBREW LETTER TAV u'\ufffe' # 0x72 -> UNDEFINED u'\ufffe' # 0x73 -> UNDEFINED u'\xa0' # 0x74 -> NO-BREAK SPACE u'\ufffe' # 0x75 -> UNDEFINED u'\ufffe' # 0x76 -> UNDEFINED u'\ufffe' # 0x77 -> UNDEFINED u'\u2017' # 0x78 -> DOUBLE LOW LINE u'`' # 0x79 -> GRAVE ACCENT u':' # 0x7A -> COLON u'#' # 0x7B -> NUMBER SIGN u'@' # 0x7C -> COMMERCIAL AT u"'" # 0x7D -> APOSTROPHE u'=' # 0x7E -> EQUALS SIGN u'"' # 0x7F -> QUOTATION MARK u'\ufffe' # 0x80 -> UNDEFINED u'a' # 0x81 -> LATIN SMALL LETTER A u'b' # 0x82 -> LATIN SMALL LETTER B u'c' # 0x83 -> LATIN SMALL LETTER C u'd' # 0x84 -> LATIN SMALL LETTER D u'e' # 0x85 -> LATIN SMALL LETTER E u'f' # 0x86 -> LATIN SMALL LETTER F u'g' # 0x87 -> LATIN SMALL LETTER G u'h' # 0x88 -> LATIN SMALL LETTER H u'i' # 0x89 -> LATIN SMALL LETTER I u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\ufffe' # 0x8C -> UNDEFINED u'\ufffe' # 0x8D -> UNDEFINED u'\ufffe' # 0x8E -> UNDEFINED u'\xb1' # 0x8F -> PLUS-MINUS SIGN u'\xb0' # 0x90 -> DEGREE SIGN u'j' # 0x91 -> LATIN SMALL LETTER J u'k' # 0x92 -> LATIN SMALL LETTER K u'l' # 0x93 -> LATIN SMALL LETTER L u'm' # 0x94 -> LATIN SMALL LETTER M u'n' # 0x95 -> LATIN SMALL LETTER N u'o' # 0x96 -> LATIN SMALL LETTER O u'p' # 0x97 -> LATIN SMALL LETTER P u'q' # 0x98 -> LATIN SMALL LETTER Q u'r' # 0x99 -> LATIN SMALL LETTER R u'\ufffe' # 0x9A -> UNDEFINED u'\ufffe' # 0x9B -> UNDEFINED u'\ufffe' # 0x9C -> UNDEFINED u'\xb8' # 0x9D -> CEDILLA u'\ufffe' # 0x9E -> UNDEFINED u'\xa4' # 0x9F -> CURRENCY SIGN u'\xb5' # 0xA0 -> MICRO SIGN u'~' # 0xA1 -> TILDE u's' # 0xA2 -> LATIN SMALL LETTER S u't' # 0xA3 -> LATIN SMALL LETTER T u'u' # 0xA4 -> LATIN SMALL LETTER U u'v' # 0xA5 -> LATIN SMALL LETTER V u'w' # 0xA6 -> LATIN SMALL LETTER W u'x' # 0xA7 -> LATIN SMALL LETTER X u'y' # 0xA8 -> LATIN SMALL LETTER Y u'z' # 0xA9 -> LATIN SMALL LETTER Z u'\ufffe' # 0xAA -> UNDEFINED u'\ufffe' # 0xAB -> UNDEFINED u'\ufffe' # 0xAC -> UNDEFINED u'\ufffe' # 0xAD -> UNDEFINED u'\ufffe' # 0xAE -> UNDEFINED u'\xae' # 0xAF -> REGISTERED SIGN u'^' # 0xB0 -> CIRCUMFLEX ACCENT u'\xa3' # 0xB1 -> POUND SIGN u'\xa5' # 0xB2 -> YEN SIGN u'\xb7' # 0xB3 -> MIDDLE DOT u'\xa9' # 0xB4 -> COPYRIGHT SIGN u'\xa7' # 0xB5 -> SECTION SIGN u'\xb6' # 0xB6 -> PILCROW SIGN u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS u'[' # 0xBA -> LEFT SQUARE BRACKET u']' # 0xBB -> RIGHT SQUARE BRACKET u'\xaf' # 0xBC -> MACRON u'\xa8' # 0xBD -> DIAERESIS u'\xb4' # 0xBE -> ACUTE ACCENT u'\xd7' # 0xBF -> MULTIPLICATION SIGN u'{' # 0xC0 -> LEFT CURLY BRACKET u'A' # 0xC1 -> LATIN CAPITAL LETTER A u'B' # 0xC2 -> LATIN CAPITAL LETTER B u'C' # 0xC3 -> LATIN CAPITAL LETTER C u'D' # 0xC4 -> LATIN CAPITAL LETTER D u'E' # 0xC5 -> LATIN CAPITAL LETTER E u'F' # 0xC6 -> LATIN CAPITAL LETTER F u'G' # 0xC7 -> LATIN CAPITAL LETTER G u'H' # 0xC8 -> LATIN CAPITAL LETTER H u'I' # 0xC9 -> LATIN CAPITAL LETTER I u'\xad' # 0xCA -> SOFT HYPHEN u'\ufffe' # 0xCB -> UNDEFINED u'\ufffe' # 0xCC -> UNDEFINED u'\ufffe' # 0xCD -> UNDEFINED u'\ufffe' # 0xCE -> UNDEFINED u'\ufffe' # 0xCF -> UNDEFINED u'}' # 0xD0 -> RIGHT CURLY BRACKET u'J' # 0xD1 -> LATIN CAPITAL LETTER J u'K' # 0xD2 -> LATIN CAPITAL LETTER K u'L' # 0xD3 -> LATIN CAPITAL LETTER L u'M' # 0xD4 -> LATIN CAPITAL LETTER M u'N' # 0xD5 -> LATIN CAPITAL LETTER N u'O' # 0xD6 -> LATIN CAPITAL LETTER O u'P' # 0xD7 -> LATIN CAPITAL LETTER P u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q u'R' # 0xD9 -> LATIN CAPITAL LETTER R u'\xb9' # 0xDA -> SUPERSCRIPT ONE u'\ufffe' # 0xDB -> UNDEFINED u'\ufffe' # 0xDC -> UNDEFINED u'\ufffe' # 0xDD -> UNDEFINED u'\ufffe' # 0xDE -> UNDEFINED u'\ufffe' # 0xDF -> UNDEFINED u'\\' # 0xE0 -> REVERSE SOLIDUS u'\xf7' # 0xE1 -> DIVISION SIGN u'S' # 0xE2 -> LATIN CAPITAL LETTER S u'T' # 0xE3 -> LATIN CAPITAL LETTER T u'U' # 0xE4 -> LATIN CAPITAL LETTER U u'V' # 0xE5 -> LATIN CAPITAL LETTER V u'W' # 0xE6 -> LATIN CAPITAL LETTER W u'X' # 0xE7 -> LATIN CAPITAL LETTER X u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z u'\xb2' # 0xEA -> SUPERSCRIPT TWO u'\ufffe' # 0xEB -> UNDEFINED u'\ufffe' # 0xEC -> UNDEFINED u'\ufffe' # 0xED -> UNDEFINED u'\ufffe' # 0xEE -> UNDEFINED u'\ufffe' # 0xEF -> UNDEFINED u'0' # 0xF0 -> DIGIT ZERO u'1' # 0xF1 -> DIGIT ONE u'2' # 0xF2 -> DIGIT TWO u'3' # 0xF3 -> DIGIT THREE u'4' # 0xF4 -> DIGIT FOUR u'5' # 0xF5 -> DIGIT FIVE u'6' # 0xF6 -> DIGIT SIX u'7' # 0xF7 -> DIGIT SEVEN u'8' # 0xF8 -> DIGIT EIGHT u'9' # 0xF9 -> DIGIT NINE u'\xb3' # 0xFA -> SUPERSCRIPT THREE u'\ufffe' # 0xFB -> UNDEFINED u'\ufffe' # 0xFC -> UNDEFINED u'\ufffe' # 0xFD -> UNDEFINED u'\ufffe' # 0xFE -> UNDEFINED u'\x9f' # 0xFF -> EIGHT ONES ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) big5.py 0000644 00000001773 15053241622 0005754 0 ustar 00 # # big5.py: Python Unicode Codec for BIG5 # # Written by Hye-Shik Chang <perky@FreeBSD.org> # import _codecs_tw, codecs import _multibytecodec as mbc codec = _codecs_tw.getcodec('big5') class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, codecs.IncrementalEncoder): codec = codec class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, codecs.IncrementalDecoder): codec = codec class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): codec = codec class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): codec = codec def getregentry(): return codecs.CodecInfo( name='big5', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) iso8859_3.py 0000644 00000032041 15053241622 0006470 0 ustar 00 """ Python Character Mapping Codec iso8859_3 generated from 'MAPPINGS/ISO8859/8859-3.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='iso8859-3', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x04' # 0x04 -> END OF TRANSMISSION u'\x05' # 0x05 -> ENQUIRY u'\x06' # 0x06 -> ACKNOWLEDGE u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION u'\n' # 0x0A -> LINE FEED u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x14' # 0x14 -> DEVICE CONTROL FOUR u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x16 -> SYNCHRONOUS IDLE u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x1a' # 0x1A -> SUBSTITUTE u'\x1b' # 0x1B -> ESCAPE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> <control> u'\x81' # 0x81 -> <control> u'\x82' # 0x82 -> <control> u'\x83' # 0x83 -> <control> u'\x84' # 0x84 -> <control> u'\x85' # 0x85 -> <control> u'\x86' # 0x86 -> <control> u'\x87' # 0x87 -> <control> u'\x88' # 0x88 -> <control> u'\x89' # 0x89 -> <control> u'\x8a' # 0x8A -> <control> u'\x8b' # 0x8B -> <control> u'\x8c' # 0x8C -> <control> u'\x8d' # 0x8D -> <control> u'\x8e' # 0x8E -> <control> u'\x8f' # 0x8F -> <control> u'\x90' # 0x90 -> <control> u'\x91' # 0x91 -> <control> u'\x92' # 0x92 -> <control> u'\x93' # 0x93 -> <control> u'\x94' # 0x94 -> <control> u'\x95' # 0x95 -> <control> u'\x96' # 0x96 -> <control> u'\x97' # 0x97 -> <control> u'\x98' # 0x98 -> <control> u'\x99' # 0x99 -> <control> u'\x9a' # 0x9A -> <control> u'\x9b' # 0x9B -> <control> u'\x9c' # 0x9C -> <control> u'\x9d' # 0x9D -> <control> u'\x9e' # 0x9E -> <control> u'\x9f' # 0x9F -> <control> u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\u0126' # 0xA1 -> LATIN CAPITAL LETTER H WITH STROKE u'\u02d8' # 0xA2 -> BREVE u'\xa3' # 0xA3 -> POUND SIGN u'\xa4' # 0xA4 -> CURRENCY SIGN u'\ufffe' u'\u0124' # 0xA6 -> LATIN CAPITAL LETTER H WITH CIRCUMFLEX u'\xa7' # 0xA7 -> SECTION SIGN u'\xa8' # 0xA8 -> DIAERESIS u'\u0130' # 0xA9 -> LATIN CAPITAL LETTER I WITH DOT ABOVE u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA u'\u011e' # 0xAB -> LATIN CAPITAL LETTER G WITH BREVE u'\u0134' # 0xAC -> LATIN CAPITAL LETTER J WITH CIRCUMFLEX u'\xad' # 0xAD -> SOFT HYPHEN u'\ufffe' u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE u'\xb0' # 0xB0 -> DEGREE SIGN u'\u0127' # 0xB1 -> LATIN SMALL LETTER H WITH STROKE u'\xb2' # 0xB2 -> SUPERSCRIPT TWO u'\xb3' # 0xB3 -> SUPERSCRIPT THREE u'\xb4' # 0xB4 -> ACUTE ACCENT u'\xb5' # 0xB5 -> MICRO SIGN u'\u0125' # 0xB6 -> LATIN SMALL LETTER H WITH CIRCUMFLEX u'\xb7' # 0xB7 -> MIDDLE DOT u'\xb8' # 0xB8 -> CEDILLA u'\u0131' # 0xB9 -> LATIN SMALL LETTER DOTLESS I u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA u'\u011f' # 0xBB -> LATIN SMALL LETTER G WITH BREVE u'\u0135' # 0xBC -> LATIN SMALL LETTER J WITH CIRCUMFLEX u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF u'\ufffe' u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\ufffe' u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\u010a' # 0xC5 -> LATIN CAPITAL LETTER C WITH DOT ABOVE u'\u0108' # 0xC6 -> LATIN CAPITAL LETTER C WITH CIRCUMFLEX u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\ufffe' u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\u0120' # 0xD5 -> LATIN CAPITAL LETTER G WITH DOT ABOVE u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xd7' # 0xD7 -> MULTIPLICATION SIGN u'\u011c' # 0xD8 -> LATIN CAPITAL LETTER G WITH CIRCUMFLEX u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\u016c' # 0xDD -> LATIN CAPITAL LETTER U WITH BREVE u'\u015c' # 0xDE -> LATIN CAPITAL LETTER S WITH CIRCUMFLEX u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\ufffe' u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS u'\u010b' # 0xE5 -> LATIN SMALL LETTER C WITH DOT ABOVE u'\u0109' # 0xE6 -> LATIN SMALL LETTER C WITH CIRCUMFLEX u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS u'\ufffe' u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\u0121' # 0xF5 -> LATIN SMALL LETTER G WITH DOT ABOVE u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf7' # 0xF7 -> DIVISION SIGN u'\u011d' # 0xF8 -> LATIN SMALL LETTER G WITH CIRCUMFLEX u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS u'\u016d' # 0xFD -> LATIN SMALL LETTER U WITH BREVE u'\u015d' # 0xFE -> LATIN SMALL LETTER S WITH CIRCUMFLEX u'\u02d9' # 0xFF -> DOT ABOVE ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) utf_16_le.py 0000644 00000002015 15053241622 0006700 0 ustar 00 """ Python 'utf-16-le' Codec Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. """ import codecs ### Codec APIs encode = codecs.utf_16_le_encode def decode(input, errors='strict'): return codecs.utf_16_le_decode(input, errors, True) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.utf_16_le_encode(input, self.errors)[0] class IncrementalDecoder(codecs.BufferedIncrementalDecoder): _buffer_decode = codecs.utf_16_le_decode class StreamWriter(codecs.StreamWriter): encode = codecs.utf_16_le_encode class StreamReader(codecs.StreamReader): decode = codecs.utf_16_le_decode ### encodings module API def getregentry(): return codecs.CodecInfo( name='utf-16-le', encode=encode, decode=decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) cp273.py 0000644 00000033464 15053241622 0005766 0 ustar 00 """ Python Character Mapping Codec cp273 generated from 'python-mappings/CP273.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp273', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( '\x00' # 0x00 -> NULL (NUL) '\x01' # 0x01 -> START OF HEADING (SOH) '\x02' # 0x02 -> START OF TEXT (STX) '\x03' # 0x03 -> END OF TEXT (ETX) '\x9c' # 0x04 -> STRING TERMINATOR (ST) '\t' # 0x05 -> CHARACTER TABULATION (HT) '\x86' # 0x06 -> START OF SELECTED AREA (SSA) '\x7f' # 0x07 -> DELETE (DEL) '\x97' # 0x08 -> END OF GUARDED AREA (EPA) '\x8d' # 0x09 -> REVERSE LINE FEED (RI) '\x8e' # 0x0A -> SINGLE-SHIFT TWO (SS2) '\x0b' # 0x0B -> LINE TABULATION (VT) '\x0c' # 0x0C -> FORM FEED (FF) '\r' # 0x0D -> CARRIAGE RETURN (CR) '\x0e' # 0x0E -> SHIFT OUT (SO) '\x0f' # 0x0F -> SHIFT IN (SI) '\x10' # 0x10 -> DATALINK ESCAPE (DLE) '\x11' # 0x11 -> DEVICE CONTROL ONE (DC1) '\x12' # 0x12 -> DEVICE CONTROL TWO (DC2) '\x13' # 0x13 -> DEVICE CONTROL THREE (DC3) '\x9d' # 0x14 -> OPERATING SYSTEM COMMAND (OSC) '\x85' # 0x15 -> NEXT LINE (NEL) '\x08' # 0x16 -> BACKSPACE (BS) '\x87' # 0x17 -> END OF SELECTED AREA (ESA) '\x18' # 0x18 -> CANCEL (CAN) '\x19' # 0x19 -> END OF MEDIUM (EM) '\x92' # 0x1A -> PRIVATE USE TWO (PU2) '\x8f' # 0x1B -> SINGLE-SHIFT THREE (SS3) '\x1c' # 0x1C -> FILE SEPARATOR (IS4) '\x1d' # 0x1D -> GROUP SEPARATOR (IS3) '\x1e' # 0x1E -> RECORD SEPARATOR (IS2) '\x1f' # 0x1F -> UNIT SEPARATOR (IS1) '\x80' # 0x20 -> PADDING CHARACTER (PAD) '\x81' # 0x21 -> HIGH OCTET PRESET (HOP) '\x82' # 0x22 -> BREAK PERMITTED HERE (BPH) '\x83' # 0x23 -> NO BREAK HERE (NBH) '\x84' # 0x24 -> INDEX (IND) '\n' # 0x25 -> LINE FEED (LF) '\x17' # 0x26 -> END OF TRANSMISSION BLOCK (ETB) '\x1b' # 0x27 -> ESCAPE (ESC) '\x88' # 0x28 -> CHARACTER TABULATION SET (HTS) '\x89' # 0x29 -> CHARACTER TABULATION WITH JUSTIFICATION (HTJ) '\x8a' # 0x2A -> LINE TABULATION SET (VTS) '\x8b' # 0x2B -> PARTIAL LINE FORWARD (PLD) '\x8c' # 0x2C -> PARTIAL LINE BACKWARD (PLU) '\x05' # 0x2D -> ENQUIRY (ENQ) '\x06' # 0x2E -> ACKNOWLEDGE (ACK) '\x07' # 0x2F -> BELL (BEL) '\x90' # 0x30 -> DEVICE CONTROL STRING (DCS) '\x91' # 0x31 -> PRIVATE USE ONE (PU1) '\x16' # 0x32 -> SYNCHRONOUS IDLE (SYN) '\x93' # 0x33 -> SET TRANSMIT STATE (STS) '\x94' # 0x34 -> CANCEL CHARACTER (CCH) '\x95' # 0x35 -> MESSAGE WAITING (MW) '\x96' # 0x36 -> START OF GUARDED AREA (SPA) '\x04' # 0x37 -> END OF TRANSMISSION (EOT) '\x98' # 0x38 -> START OF STRING (SOS) '\x99' # 0x39 -> SINGLE GRAPHIC CHARACTER INTRODUCER (SGCI) '\x9a' # 0x3A -> SINGLE CHARACTER INTRODUCER (SCI) '\x9b' # 0x3B -> CONTROL SEQUENCE INTRODUCER (CSI) '\x14' # 0x3C -> DEVICE CONTROL FOUR (DC4) '\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE (NAK) '\x9e' # 0x3E -> PRIVACY MESSAGE (PM) '\x1a' # 0x3F -> SUBSTITUTE (SUB) ' ' # 0x40 -> SPACE '\xa0' # 0x41 -> NO-BREAK SPACE '\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX '{' # 0x43 -> LEFT CURLY BRACKET '\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE '\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE '\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE '\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE '\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA '\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE '\xc4' # 0x4A -> LATIN CAPITAL LETTER A WITH DIAERESIS '.' # 0x4B -> FULL STOP '<' # 0x4C -> LESS-THAN SIGN '(' # 0x4D -> LEFT PARENTHESIS '+' # 0x4E -> PLUS SIGN '!' # 0x4F -> EXCLAMATION MARK '&' # 0x50 -> AMPERSAND '\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE '\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX '\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS '\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE '\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE '\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX '\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS '\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE '~' # 0x59 -> TILDE '\xdc' # 0x5A -> LATIN CAPITAL LETTER U WITH DIAERESIS '$' # 0x5B -> DOLLAR SIGN '*' # 0x5C -> ASTERISK ')' # 0x5D -> RIGHT PARENTHESIS ';' # 0x5E -> SEMICOLON '^' # 0x5F -> CIRCUMFLEX ACCENT '-' # 0x60 -> HYPHEN-MINUS '/' # 0x61 -> SOLIDUS '\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX '[' # 0x63 -> LEFT SQUARE BRACKET '\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE '\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE '\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE '\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE '\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA '\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE '\xf6' # 0x6A -> LATIN SMALL LETTER O WITH DIAERESIS ',' # 0x6B -> COMMA '%' # 0x6C -> PERCENT SIGN '_' # 0x6D -> LOW LINE '>' # 0x6E -> GREATER-THAN SIGN '?' # 0x6F -> QUESTION MARK '\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE '\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE '\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX '\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS '\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE '\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE '\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX '\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS '\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE '`' # 0x79 -> GRAVE ACCENT ':' # 0x7A -> COLON '#' # 0x7B -> NUMBER SIGN '\xa7' # 0x7C -> SECTION SIGN "'" # 0x7D -> APOSTROPHE '=' # 0x7E -> EQUALS SIGN '"' # 0x7F -> QUOTATION MARK '\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE 'a' # 0x81 -> LATIN SMALL LETTER A 'b' # 0x82 -> LATIN SMALL LETTER B 'c' # 0x83 -> LATIN SMALL LETTER C 'd' # 0x84 -> LATIN SMALL LETTER D 'e' # 0x85 -> LATIN SMALL LETTER E 'f' # 0x86 -> LATIN SMALL LETTER F 'g' # 0x87 -> LATIN SMALL LETTER G 'h' # 0x88 -> LATIN SMALL LETTER H 'i' # 0x89 -> LATIN SMALL LETTER I '\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK '\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK '\xf0' # 0x8C -> LATIN SMALL LETTER ETH (Icelandic) '\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE '\xfe' # 0x8E -> LATIN SMALL LETTER THORN (Icelandic) '\xb1' # 0x8F -> PLUS-MINUS SIGN '\xb0' # 0x90 -> DEGREE SIGN 'j' # 0x91 -> LATIN SMALL LETTER J 'k' # 0x92 -> LATIN SMALL LETTER K 'l' # 0x93 -> LATIN SMALL LETTER L 'm' # 0x94 -> LATIN SMALL LETTER M 'n' # 0x95 -> LATIN SMALL LETTER N 'o' # 0x96 -> LATIN SMALL LETTER O 'p' # 0x97 -> LATIN SMALL LETTER P 'q' # 0x98 -> LATIN SMALL LETTER Q 'r' # 0x99 -> LATIN SMALL LETTER R '\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR '\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR '\xe6' # 0x9C -> LATIN SMALL LETTER AE '\xb8' # 0x9D -> CEDILLA '\xc6' # 0x9E -> LATIN CAPITAL LETTER AE '\xa4' # 0x9F -> CURRENCY SIGN '\xb5' # 0xA0 -> MICRO SIGN '\xdf' # 0xA1 -> LATIN SMALL LETTER SHARP S (German) 's' # 0xA2 -> LATIN SMALL LETTER S 't' # 0xA3 -> LATIN SMALL LETTER T 'u' # 0xA4 -> LATIN SMALL LETTER U 'v' # 0xA5 -> LATIN SMALL LETTER V 'w' # 0xA6 -> LATIN SMALL LETTER W 'x' # 0xA7 -> LATIN SMALL LETTER X 'y' # 0xA8 -> LATIN SMALL LETTER Y 'z' # 0xA9 -> LATIN SMALL LETTER Z '\xa1' # 0xAA -> INVERTED EXCLAMATION MARK '\xbf' # 0xAB -> INVERTED QUESTION MARK '\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (Icelandic) '\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE '\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (Icelandic) '\xae' # 0xAF -> REGISTERED SIGN '\xa2' # 0xB0 -> CENT SIGN '\xa3' # 0xB1 -> POUND SIGN '\xa5' # 0xB2 -> YEN SIGN '\xb7' # 0xB3 -> MIDDLE DOT '\xa9' # 0xB4 -> COPYRIGHT SIGN '@' # 0xB5 -> COMMERCIAL AT '\xb6' # 0xB6 -> PILCROW SIGN '\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER '\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF '\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS '\xac' # 0xBA -> NOT SIGN '|' # 0xBB -> VERTICAL LINE '\u203e' # 0xBC -> OVERLINE '\xa8' # 0xBD -> DIAERESIS '\xb4' # 0xBE -> ACUTE ACCENT '\xd7' # 0xBF -> MULTIPLICATION SIGN '\xe4' # 0xC0 -> LATIN SMALL LETTER A WITH DIAERESIS 'A' # 0xC1 -> LATIN CAPITAL LETTER A 'B' # 0xC2 -> LATIN CAPITAL LETTER B 'C' # 0xC3 -> LATIN CAPITAL LETTER C 'D' # 0xC4 -> LATIN CAPITAL LETTER D 'E' # 0xC5 -> LATIN CAPITAL LETTER E 'F' # 0xC6 -> LATIN CAPITAL LETTER F 'G' # 0xC7 -> LATIN CAPITAL LETTER G 'H' # 0xC8 -> LATIN CAPITAL LETTER H 'I' # 0xC9 -> LATIN CAPITAL LETTER I '\xad' # 0xCA -> SOFT HYPHEN '\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX '\xa6' # 0xCC -> BROKEN BAR '\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE '\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE '\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE '\xfc' # 0xD0 -> LATIN SMALL LETTER U WITH DIAERESIS 'J' # 0xD1 -> LATIN CAPITAL LETTER J 'K' # 0xD2 -> LATIN CAPITAL LETTER K 'L' # 0xD3 -> LATIN CAPITAL LETTER L 'M' # 0xD4 -> LATIN CAPITAL LETTER M 'N' # 0xD5 -> LATIN CAPITAL LETTER N 'O' # 0xD6 -> LATIN CAPITAL LETTER O 'P' # 0xD7 -> LATIN CAPITAL LETTER P 'Q' # 0xD8 -> LATIN CAPITAL LETTER Q 'R' # 0xD9 -> LATIN CAPITAL LETTER R '\xb9' # 0xDA -> SUPERSCRIPT ONE '\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX '}' # 0xDC -> RIGHT CURLY BRACKET '\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE '\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE '\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS '\xd6' # 0xE0 -> LATIN CAPITAL LETTER O WITH DIAERESIS '\xf7' # 0xE1 -> DIVISION SIGN 'S' # 0xE2 -> LATIN CAPITAL LETTER S 'T' # 0xE3 -> LATIN CAPITAL LETTER T 'U' # 0xE4 -> LATIN CAPITAL LETTER U 'V' # 0xE5 -> LATIN CAPITAL LETTER V 'W' # 0xE6 -> LATIN CAPITAL LETTER W 'X' # 0xE7 -> LATIN CAPITAL LETTER X 'Y' # 0xE8 -> LATIN CAPITAL LETTER Y 'Z' # 0xE9 -> LATIN CAPITAL LETTER Z '\xb2' # 0xEA -> SUPERSCRIPT TWO '\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX '\\' # 0xEC -> REVERSE SOLIDUS '\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE '\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE '\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE '0' # 0xF0 -> DIGIT ZERO '1' # 0xF1 -> DIGIT ONE '2' # 0xF2 -> DIGIT TWO '3' # 0xF3 -> DIGIT THREE '4' # 0xF4 -> DIGIT FOUR '5' # 0xF5 -> DIGIT FIVE '6' # 0xF6 -> DIGIT SIX '7' # 0xF7 -> DIGIT SEVEN '8' # 0xF8 -> DIGIT EIGHT '9' # 0xF9 -> DIGIT NINE '\xb3' # 0xFA -> SUPERSCRIPT THREE '\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX ']' # 0xFC -> RIGHT SQUARE BRACKET '\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE '\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE '\x9f' # 0xFF -> APPLICATION PROGRAM COMMAND (APC) ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) cp855.py 0000644 00000102472 15053241622 0005770 0 ustar 00 """ Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP855.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp855', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE 0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE 0x0084: 0x0451, # CYRILLIC SMALL LETTER IO 0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO 0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE 0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE 0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE 0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE 0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I 0x008c: 0x0457, # CYRILLIC SMALL LETTER YI 0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI 0x008e: 0x0458, # CYRILLIC SMALL LETTER JE 0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE 0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE 0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE 0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE 0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE 0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE 0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE 0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE 0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE 0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U 0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U 0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE 0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE 0x009c: 0x044e, # CYRILLIC SMALL LETTER YU 0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU 0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN 0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A 0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A 0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE 0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE 0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE 0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE 0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE 0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE 0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE 0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE 0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF 0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF 0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE 0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00b0: 0x2591, # LIGHT SHADE 0x00b1: 0x2592, # MEDIUM SHADE 0x00b2: 0x2593, # DARK SHADE 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA 0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA 0x00b7: 0x0438, # CYRILLIC SMALL LETTER I 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT 0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I 0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA 0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x00cf: 0x00a4, # CURRENCY SIGN 0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL 0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL 0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM 0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM 0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN 0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN 0x00d6: 0x043e, # CYRILLIC SMALL LETTER O 0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O 0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x00db: 0x2588, # FULL BLOCK 0x00dc: 0x2584, # LOWER HALF BLOCK 0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE 0x00de: 0x044f, # CYRILLIC SMALL LETTER YA 0x00df: 0x2580, # UPPER HALF BLOCK 0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA 0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER 0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER 0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES 0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES 0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE 0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE 0x00e7: 0x0443, # CYRILLIC SMALL LETTER U 0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U 0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE 0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE 0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE 0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE 0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN 0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN 0x00ef: 0x2116, # NUMERO SIGN 0x00f0: 0x00ad, # SOFT HYPHEN 0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU 0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU 0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE 0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE 0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA 0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA 0x00f7: 0x044d, # CYRILLIC SMALL LETTER E 0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA 0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA 0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE 0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE 0x00fd: 0x00a7, # SECTION SIGN 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> NULL u'\x01' # 0x0001 -> START OF HEADING u'\x02' # 0x0002 -> START OF TEXT u'\x03' # 0x0003 -> END OF TEXT u'\x04' # 0x0004 -> END OF TRANSMISSION u'\x05' # 0x0005 -> ENQUIRY u'\x06' # 0x0006 -> ACKNOWLEDGE u'\x07' # 0x0007 -> BELL u'\x08' # 0x0008 -> BACKSPACE u'\t' # 0x0009 -> HORIZONTAL TABULATION u'\n' # 0x000a -> LINE FEED u'\x0b' # 0x000b -> VERTICAL TABULATION u'\x0c' # 0x000c -> FORM FEED u'\r' # 0x000d -> CARRIAGE RETURN u'\x0e' # 0x000e -> SHIFT OUT u'\x0f' # 0x000f -> SHIFT IN u'\x10' # 0x0010 -> DATA LINK ESCAPE u'\x11' # 0x0011 -> DEVICE CONTROL ONE u'\x12' # 0x0012 -> DEVICE CONTROL TWO u'\x13' # 0x0013 -> DEVICE CONTROL THREE u'\x14' # 0x0014 -> DEVICE CONTROL FOUR u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x0016 -> SYNCHRONOUS IDLE u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK u'\x18' # 0x0018 -> CANCEL u'\x19' # 0x0019 -> END OF MEDIUM u'\x1a' # 0x001a -> SUBSTITUTE u'\x1b' # 0x001b -> ESCAPE u'\x1c' # 0x001c -> FILE SEPARATOR u'\x1d' # 0x001d -> GROUP SEPARATOR u'\x1e' # 0x001e -> RECORD SEPARATOR u'\x1f' # 0x001f -> UNIT SEPARATOR u' ' # 0x0020 -> SPACE u'!' # 0x0021 -> EXCLAMATION MARK u'"' # 0x0022 -> QUOTATION MARK u'#' # 0x0023 -> NUMBER SIGN u'$' # 0x0024 -> DOLLAR SIGN u'%' # 0x0025 -> PERCENT SIGN u'&' # 0x0026 -> AMPERSAND u"'" # 0x0027 -> APOSTROPHE u'(' # 0x0028 -> LEFT PARENTHESIS u')' # 0x0029 -> RIGHT PARENTHESIS u'*' # 0x002a -> ASTERISK u'+' # 0x002b -> PLUS SIGN u',' # 0x002c -> COMMA u'-' # 0x002d -> HYPHEN-MINUS u'.' # 0x002e -> FULL STOP u'/' # 0x002f -> SOLIDUS u'0' # 0x0030 -> DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE u'2' # 0x0032 -> DIGIT TWO u'3' # 0x0033 -> DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE u':' # 0x003a -> COLON u';' # 0x003b -> SEMICOLON u'<' # 0x003c -> LESS-THAN SIGN u'=' # 0x003d -> EQUALS SIGN u'>' # 0x003e -> GREATER-THAN SIGN u'?' # 0x003f -> QUESTION MARK u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET u'\\' # 0x005c -> REVERSE SOLIDUS u']' # 0x005d -> RIGHT SQUARE BRACKET u'^' # 0x005e -> CIRCUMFLEX ACCENT u'_' # 0x005f -> LOW LINE u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET u'|' # 0x007c -> VERTICAL LINE u'}' # 0x007d -> RIGHT CURLY BRACKET u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> DELETE u'\u0452' # 0x0080 -> CYRILLIC SMALL LETTER DJE u'\u0402' # 0x0081 -> CYRILLIC CAPITAL LETTER DJE u'\u0453' # 0x0082 -> CYRILLIC SMALL LETTER GJE u'\u0403' # 0x0083 -> CYRILLIC CAPITAL LETTER GJE u'\u0451' # 0x0084 -> CYRILLIC SMALL LETTER IO u'\u0401' # 0x0085 -> CYRILLIC CAPITAL LETTER IO u'\u0454' # 0x0086 -> CYRILLIC SMALL LETTER UKRAINIAN IE u'\u0404' # 0x0087 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE u'\u0455' # 0x0088 -> CYRILLIC SMALL LETTER DZE u'\u0405' # 0x0089 -> CYRILLIC CAPITAL LETTER DZE u'\u0456' # 0x008a -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I u'\u0406' # 0x008b -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I u'\u0457' # 0x008c -> CYRILLIC SMALL LETTER YI u'\u0407' # 0x008d -> CYRILLIC CAPITAL LETTER YI u'\u0458' # 0x008e -> CYRILLIC SMALL LETTER JE u'\u0408' # 0x008f -> CYRILLIC CAPITAL LETTER JE u'\u0459' # 0x0090 -> CYRILLIC SMALL LETTER LJE u'\u0409' # 0x0091 -> CYRILLIC CAPITAL LETTER LJE u'\u045a' # 0x0092 -> CYRILLIC SMALL LETTER NJE u'\u040a' # 0x0093 -> CYRILLIC CAPITAL LETTER NJE u'\u045b' # 0x0094 -> CYRILLIC SMALL LETTER TSHE u'\u040b' # 0x0095 -> CYRILLIC CAPITAL LETTER TSHE u'\u045c' # 0x0096 -> CYRILLIC SMALL LETTER KJE u'\u040c' # 0x0097 -> CYRILLIC CAPITAL LETTER KJE u'\u045e' # 0x0098 -> CYRILLIC SMALL LETTER SHORT U u'\u040e' # 0x0099 -> CYRILLIC CAPITAL LETTER SHORT U u'\u045f' # 0x009a -> CYRILLIC SMALL LETTER DZHE u'\u040f' # 0x009b -> CYRILLIC CAPITAL LETTER DZHE u'\u044e' # 0x009c -> CYRILLIC SMALL LETTER YU u'\u042e' # 0x009d -> CYRILLIC CAPITAL LETTER YU u'\u044a' # 0x009e -> CYRILLIC SMALL LETTER HARD SIGN u'\u042a' # 0x009f -> CYRILLIC CAPITAL LETTER HARD SIGN u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A u'\u0410' # 0x00a1 -> CYRILLIC CAPITAL LETTER A u'\u0431' # 0x00a2 -> CYRILLIC SMALL LETTER BE u'\u0411' # 0x00a3 -> CYRILLIC CAPITAL LETTER BE u'\u0446' # 0x00a4 -> CYRILLIC SMALL LETTER TSE u'\u0426' # 0x00a5 -> CYRILLIC CAPITAL LETTER TSE u'\u0434' # 0x00a6 -> CYRILLIC SMALL LETTER DE u'\u0414' # 0x00a7 -> CYRILLIC CAPITAL LETTER DE u'\u0435' # 0x00a8 -> CYRILLIC SMALL LETTER IE u'\u0415' # 0x00a9 -> CYRILLIC CAPITAL LETTER IE u'\u0444' # 0x00aa -> CYRILLIC SMALL LETTER EF u'\u0424' # 0x00ab -> CYRILLIC CAPITAL LETTER EF u'\u0433' # 0x00ac -> CYRILLIC SMALL LETTER GHE u'\u0413' # 0x00ad -> CYRILLIC CAPITAL LETTER GHE u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2591' # 0x00b0 -> LIGHT SHADE u'\u2592' # 0x00b1 -> MEDIUM SHADE u'\u2593' # 0x00b2 -> DARK SHADE u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT u'\u0445' # 0x00b5 -> CYRILLIC SMALL LETTER HA u'\u0425' # 0x00b6 -> CYRILLIC CAPITAL LETTER HA u'\u0438' # 0x00b7 -> CYRILLIC SMALL LETTER I u'\u0418' # 0x00b8 -> CYRILLIC CAPITAL LETTER I u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT u'\u0439' # 0x00bd -> CYRILLIC SMALL LETTER SHORT I u'\u0419' # 0x00be -> CYRILLIC CAPITAL LETTER SHORT I u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL u'\u043a' # 0x00c6 -> CYRILLIC SMALL LETTER KA u'\u041a' # 0x00c7 -> CYRILLIC CAPITAL LETTER KA u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL u'\xa4' # 0x00cf -> CURRENCY SIGN u'\u043b' # 0x00d0 -> CYRILLIC SMALL LETTER EL u'\u041b' # 0x00d1 -> CYRILLIC CAPITAL LETTER EL u'\u043c' # 0x00d2 -> CYRILLIC SMALL LETTER EM u'\u041c' # 0x00d3 -> CYRILLIC CAPITAL LETTER EM u'\u043d' # 0x00d4 -> CYRILLIC SMALL LETTER EN u'\u041d' # 0x00d5 -> CYRILLIC CAPITAL LETTER EN u'\u043e' # 0x00d6 -> CYRILLIC SMALL LETTER O u'\u041e' # 0x00d7 -> CYRILLIC CAPITAL LETTER O u'\u043f' # 0x00d8 -> CYRILLIC SMALL LETTER PE u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT u'\u2588' # 0x00db -> FULL BLOCK u'\u2584' # 0x00dc -> LOWER HALF BLOCK u'\u041f' # 0x00dd -> CYRILLIC CAPITAL LETTER PE u'\u044f' # 0x00de -> CYRILLIC SMALL LETTER YA u'\u2580' # 0x00df -> UPPER HALF BLOCK u'\u042f' # 0x00e0 -> CYRILLIC CAPITAL LETTER YA u'\u0440' # 0x00e1 -> CYRILLIC SMALL LETTER ER u'\u0420' # 0x00e2 -> CYRILLIC CAPITAL LETTER ER u'\u0441' # 0x00e3 -> CYRILLIC SMALL LETTER ES u'\u0421' # 0x00e4 -> CYRILLIC CAPITAL LETTER ES u'\u0442' # 0x00e5 -> CYRILLIC SMALL LETTER TE u'\u0422' # 0x00e6 -> CYRILLIC CAPITAL LETTER TE u'\u0443' # 0x00e7 -> CYRILLIC SMALL LETTER U u'\u0423' # 0x00e8 -> CYRILLIC CAPITAL LETTER U u'\u0436' # 0x00e9 -> CYRILLIC SMALL LETTER ZHE u'\u0416' # 0x00ea -> CYRILLIC CAPITAL LETTER ZHE u'\u0432' # 0x00eb -> CYRILLIC SMALL LETTER VE u'\u0412' # 0x00ec -> CYRILLIC CAPITAL LETTER VE u'\u044c' # 0x00ed -> CYRILLIC SMALL LETTER SOFT SIGN u'\u042c' # 0x00ee -> CYRILLIC CAPITAL LETTER SOFT SIGN u'\u2116' # 0x00ef -> NUMERO SIGN u'\xad' # 0x00f0 -> SOFT HYPHEN u'\u044b' # 0x00f1 -> CYRILLIC SMALL LETTER YERU u'\u042b' # 0x00f2 -> CYRILLIC CAPITAL LETTER YERU u'\u0437' # 0x00f3 -> CYRILLIC SMALL LETTER ZE u'\u0417' # 0x00f4 -> CYRILLIC CAPITAL LETTER ZE u'\u0448' # 0x00f5 -> CYRILLIC SMALL LETTER SHA u'\u0428' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHA u'\u044d' # 0x00f7 -> CYRILLIC SMALL LETTER E u'\u042d' # 0x00f8 -> CYRILLIC CAPITAL LETTER E u'\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA u'\u0429' # 0x00fa -> CYRILLIC CAPITAL LETTER SHCHA u'\u0447' # 0x00fb -> CYRILLIC SMALL LETTER CHE u'\u0427' # 0x00fc -> CYRILLIC CAPITAL LETTER CHE u'\xa7' # 0x00fd -> SECTION SIGN u'\u25a0' # 0x00fe -> BLACK SQUARE u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # NULL 0x0001: 0x0001, # START OF HEADING 0x0002: 0x0002, # START OF TEXT 0x0003: 0x0003, # END OF TEXT 0x0004: 0x0004, # END OF TRANSMISSION 0x0005: 0x0005, # ENQUIRY 0x0006: 0x0006, # ACKNOWLEDGE 0x0007: 0x0007, # BELL 0x0008: 0x0008, # BACKSPACE 0x0009: 0x0009, # HORIZONTAL TABULATION 0x000a: 0x000a, # LINE FEED 0x000b: 0x000b, # VERTICAL TABULATION 0x000c: 0x000c, # FORM FEED 0x000d: 0x000d, # CARRIAGE RETURN 0x000e: 0x000e, # SHIFT OUT 0x000f: 0x000f, # SHIFT IN 0x0010: 0x0010, # DATA LINK ESCAPE 0x0011: 0x0011, # DEVICE CONTROL ONE 0x0012: 0x0012, # DEVICE CONTROL TWO 0x0013: 0x0013, # DEVICE CONTROL THREE 0x0014: 0x0014, # DEVICE CONTROL FOUR 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE 0x0016: 0x0016, # SYNCHRONOUS IDLE 0x0017: 0x0017, # END OF TRANSMISSION BLOCK 0x0018: 0x0018, # CANCEL 0x0019: 0x0019, # END OF MEDIUM 0x001a: 0x001a, # SUBSTITUTE 0x001b: 0x001b, # ESCAPE 0x001c: 0x001c, # FILE SEPARATOR 0x001d: 0x001d, # GROUP SEPARATOR 0x001e: 0x001e, # RECORD SEPARATOR 0x001f: 0x001f, # UNIT SEPARATOR 0x0020: 0x0020, # SPACE 0x0021: 0x0021, # EXCLAMATION MARK 0x0022: 0x0022, # QUOTATION MARK 0x0023: 0x0023, # NUMBER SIGN 0x0024: 0x0024, # DOLLAR SIGN 0x0025: 0x0025, # PERCENT SIGN 0x0026: 0x0026, # AMPERSAND 0x0027: 0x0027, # APOSTROPHE 0x0028: 0x0028, # LEFT PARENTHESIS 0x0029: 0x0029, # RIGHT PARENTHESIS 0x002a: 0x002a, # ASTERISK 0x002b: 0x002b, # PLUS SIGN 0x002c: 0x002c, # COMMA 0x002d: 0x002d, # HYPHEN-MINUS 0x002e: 0x002e, # FULL STOP 0x002f: 0x002f, # SOLIDUS 0x0030: 0x0030, # DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE 0x0032: 0x0032, # DIGIT TWO 0x0033: 0x0033, # DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE 0x003a: 0x003a, # COLON 0x003b: 0x003b, # SEMICOLON 0x003c: 0x003c, # LESS-THAN SIGN 0x003d: 0x003d, # EQUALS SIGN 0x003e: 0x003e, # GREATER-THAN SIGN 0x003f: 0x003f, # QUESTION MARK 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005c: 0x005c, # REVERSE SOLIDUS 0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005f: 0x005f, # LOW LINE 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET 0x007c: 0x007c, # VERTICAL LINE 0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # DELETE 0x00a0: 0x00ff, # NO-BREAK SPACE 0x00a4: 0x00cf, # CURRENCY SIGN 0x00a7: 0x00fd, # SECTION SIGN 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00ad: 0x00f0, # SOFT HYPHEN 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x0401: 0x0085, # CYRILLIC CAPITAL LETTER IO 0x0402: 0x0081, # CYRILLIC CAPITAL LETTER DJE 0x0403: 0x0083, # CYRILLIC CAPITAL LETTER GJE 0x0404: 0x0087, # CYRILLIC CAPITAL LETTER UKRAINIAN IE 0x0405: 0x0089, # CYRILLIC CAPITAL LETTER DZE 0x0406: 0x008b, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I 0x0407: 0x008d, # CYRILLIC CAPITAL LETTER YI 0x0408: 0x008f, # CYRILLIC CAPITAL LETTER JE 0x0409: 0x0091, # CYRILLIC CAPITAL LETTER LJE 0x040a: 0x0093, # CYRILLIC CAPITAL LETTER NJE 0x040b: 0x0095, # CYRILLIC CAPITAL LETTER TSHE 0x040c: 0x0097, # CYRILLIC CAPITAL LETTER KJE 0x040e: 0x0099, # CYRILLIC CAPITAL LETTER SHORT U 0x040f: 0x009b, # CYRILLIC CAPITAL LETTER DZHE 0x0410: 0x00a1, # CYRILLIC CAPITAL LETTER A 0x0411: 0x00a3, # CYRILLIC CAPITAL LETTER BE 0x0412: 0x00ec, # CYRILLIC CAPITAL LETTER VE 0x0413: 0x00ad, # CYRILLIC CAPITAL LETTER GHE 0x0414: 0x00a7, # CYRILLIC CAPITAL LETTER DE 0x0415: 0x00a9, # CYRILLIC CAPITAL LETTER IE 0x0416: 0x00ea, # CYRILLIC CAPITAL LETTER ZHE 0x0417: 0x00f4, # CYRILLIC CAPITAL LETTER ZE 0x0418: 0x00b8, # CYRILLIC CAPITAL LETTER I 0x0419: 0x00be, # CYRILLIC CAPITAL LETTER SHORT I 0x041a: 0x00c7, # CYRILLIC CAPITAL LETTER KA 0x041b: 0x00d1, # CYRILLIC CAPITAL LETTER EL 0x041c: 0x00d3, # CYRILLIC CAPITAL LETTER EM 0x041d: 0x00d5, # CYRILLIC CAPITAL LETTER EN 0x041e: 0x00d7, # CYRILLIC CAPITAL LETTER O 0x041f: 0x00dd, # CYRILLIC CAPITAL LETTER PE 0x0420: 0x00e2, # CYRILLIC CAPITAL LETTER ER 0x0421: 0x00e4, # CYRILLIC CAPITAL LETTER ES 0x0422: 0x00e6, # CYRILLIC CAPITAL LETTER TE 0x0423: 0x00e8, # CYRILLIC CAPITAL LETTER U 0x0424: 0x00ab, # CYRILLIC CAPITAL LETTER EF 0x0425: 0x00b6, # CYRILLIC CAPITAL LETTER HA 0x0426: 0x00a5, # CYRILLIC CAPITAL LETTER TSE 0x0427: 0x00fc, # CYRILLIC CAPITAL LETTER CHE 0x0428: 0x00f6, # CYRILLIC CAPITAL LETTER SHA 0x0429: 0x00fa, # CYRILLIC CAPITAL LETTER SHCHA 0x042a: 0x009f, # CYRILLIC CAPITAL LETTER HARD SIGN 0x042b: 0x00f2, # CYRILLIC CAPITAL LETTER YERU 0x042c: 0x00ee, # CYRILLIC CAPITAL LETTER SOFT SIGN 0x042d: 0x00f8, # CYRILLIC CAPITAL LETTER E 0x042e: 0x009d, # CYRILLIC CAPITAL LETTER YU 0x042f: 0x00e0, # CYRILLIC CAPITAL LETTER YA 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A 0x0431: 0x00a2, # CYRILLIC SMALL LETTER BE 0x0432: 0x00eb, # CYRILLIC SMALL LETTER VE 0x0433: 0x00ac, # CYRILLIC SMALL LETTER GHE 0x0434: 0x00a6, # CYRILLIC SMALL LETTER DE 0x0435: 0x00a8, # CYRILLIC SMALL LETTER IE 0x0436: 0x00e9, # CYRILLIC SMALL LETTER ZHE 0x0437: 0x00f3, # CYRILLIC SMALL LETTER ZE 0x0438: 0x00b7, # CYRILLIC SMALL LETTER I 0x0439: 0x00bd, # CYRILLIC SMALL LETTER SHORT I 0x043a: 0x00c6, # CYRILLIC SMALL LETTER KA 0x043b: 0x00d0, # CYRILLIC SMALL LETTER EL 0x043c: 0x00d2, # CYRILLIC SMALL LETTER EM 0x043d: 0x00d4, # CYRILLIC SMALL LETTER EN 0x043e: 0x00d6, # CYRILLIC SMALL LETTER O 0x043f: 0x00d8, # CYRILLIC SMALL LETTER PE 0x0440: 0x00e1, # CYRILLIC SMALL LETTER ER 0x0441: 0x00e3, # CYRILLIC SMALL LETTER ES 0x0442: 0x00e5, # CYRILLIC SMALL LETTER TE 0x0443: 0x00e7, # CYRILLIC SMALL LETTER U 0x0444: 0x00aa, # CYRILLIC SMALL LETTER EF 0x0445: 0x00b5, # CYRILLIC SMALL LETTER HA 0x0446: 0x00a4, # CYRILLIC SMALL LETTER TSE 0x0447: 0x00fb, # CYRILLIC SMALL LETTER CHE 0x0448: 0x00f5, # CYRILLIC SMALL LETTER SHA 0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA 0x044a: 0x009e, # CYRILLIC SMALL LETTER HARD SIGN 0x044b: 0x00f1, # CYRILLIC SMALL LETTER YERU 0x044c: 0x00ed, # CYRILLIC SMALL LETTER SOFT SIGN 0x044d: 0x00f7, # CYRILLIC SMALL LETTER E 0x044e: 0x009c, # CYRILLIC SMALL LETTER YU 0x044f: 0x00de, # CYRILLIC SMALL LETTER YA 0x0451: 0x0084, # CYRILLIC SMALL LETTER IO 0x0452: 0x0080, # CYRILLIC SMALL LETTER DJE 0x0453: 0x0082, # CYRILLIC SMALL LETTER GJE 0x0454: 0x0086, # CYRILLIC SMALL LETTER UKRAINIAN IE 0x0455: 0x0088, # CYRILLIC SMALL LETTER DZE 0x0456: 0x008a, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 0x0457: 0x008c, # CYRILLIC SMALL LETTER YI 0x0458: 0x008e, # CYRILLIC SMALL LETTER JE 0x0459: 0x0090, # CYRILLIC SMALL LETTER LJE 0x045a: 0x0092, # CYRILLIC SMALL LETTER NJE 0x045b: 0x0094, # CYRILLIC SMALL LETTER TSHE 0x045c: 0x0096, # CYRILLIC SMALL LETTER KJE 0x045e: 0x0098, # CYRILLIC SMALL LETTER SHORT U 0x045f: 0x009a, # CYRILLIC SMALL LETTER DZHE 0x2116: 0x00ef, # NUMERO SIGN 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x2580: 0x00df, # UPPER HALF BLOCK 0x2584: 0x00dc, # LOWER HALF BLOCK 0x2588: 0x00db, # FULL BLOCK 0x2591: 0x00b0, # LIGHT SHADE 0x2592: 0x00b1, # MEDIUM SHADE 0x2593: 0x00b2, # DARK SHADE 0x25a0: 0x00fe, # BLACK SQUARE } tis_620.py 0000644 00000030414 15053241622 0006306 0 ustar 00 """ Python Character Mapping Codec tis_620 generated from 'python-mappings/TIS-620.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='tis-620', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x04' # 0x04 -> END OF TRANSMISSION u'\x05' # 0x05 -> ENQUIRY u'\x06' # 0x06 -> ACKNOWLEDGE u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION u'\n' # 0x0A -> LINE FEED u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x14' # 0x14 -> DEVICE CONTROL FOUR u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x16 -> SYNCHRONOUS IDLE u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x1a' # 0x1A -> SUBSTITUTE u'\x1b' # 0x1B -> ESCAPE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> <control> u'\x81' # 0x81 -> <control> u'\x82' # 0x82 -> <control> u'\x83' # 0x83 -> <control> u'\x84' # 0x84 -> <control> u'\x85' # 0x85 -> <control> u'\x86' # 0x86 -> <control> u'\x87' # 0x87 -> <control> u'\x88' # 0x88 -> <control> u'\x89' # 0x89 -> <control> u'\x8a' # 0x8A -> <control> u'\x8b' # 0x8B -> <control> u'\x8c' # 0x8C -> <control> u'\x8d' # 0x8D -> <control> u'\x8e' # 0x8E -> <control> u'\x8f' # 0x8F -> <control> u'\x90' # 0x90 -> <control> u'\x91' # 0x91 -> <control> u'\x92' # 0x92 -> <control> u'\x93' # 0x93 -> <control> u'\x94' # 0x94 -> <control> u'\x95' # 0x95 -> <control> u'\x96' # 0x96 -> <control> u'\x97' # 0x97 -> <control> u'\x98' # 0x98 -> <control> u'\x99' # 0x99 -> <control> u'\x9a' # 0x9A -> <control> u'\x9b' # 0x9B -> <control> u'\x9c' # 0x9C -> <control> u'\x9d' # 0x9D -> <control> u'\x9e' # 0x9E -> <control> u'\x9f' # 0x9F -> <control> u'\ufffe' u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA u'\u0e24' # 0xC4 -> THAI CHARACTER RU u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING u'\u0e26' # 0xC6 -> THAI CHARACTER LU u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN u'\u0e50' # 0xF0 -> THAI DIGIT ZERO u'\u0e51' # 0xF1 -> THAI DIGIT ONE u'\u0e52' # 0xF2 -> THAI DIGIT TWO u'\u0e53' # 0xF3 -> THAI DIGIT THREE u'\u0e54' # 0xF4 -> THAI DIGIT FOUR u'\u0e55' # 0xF5 -> THAI DIGIT FIVE u'\u0e56' # 0xF6 -> THAI DIGIT SIX u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT u'\u0e59' # 0xF9 -> THAI DIGIT NINE u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) mac_romanian.py 0000644 00000033135 15053241622 0007547 0 ustar 00 """ Python Character Mapping Codec mac_romanian generated from 'MAPPINGS/VENDORS/APPLE/ROMANIAN.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='mac-romanian', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> CONTROL CHARACTER u'\x01' # 0x01 -> CONTROL CHARACTER u'\x02' # 0x02 -> CONTROL CHARACTER u'\x03' # 0x03 -> CONTROL CHARACTER u'\x04' # 0x04 -> CONTROL CHARACTER u'\x05' # 0x05 -> CONTROL CHARACTER u'\x06' # 0x06 -> CONTROL CHARACTER u'\x07' # 0x07 -> CONTROL CHARACTER u'\x08' # 0x08 -> CONTROL CHARACTER u'\t' # 0x09 -> CONTROL CHARACTER u'\n' # 0x0A -> CONTROL CHARACTER u'\x0b' # 0x0B -> CONTROL CHARACTER u'\x0c' # 0x0C -> CONTROL CHARACTER u'\r' # 0x0D -> CONTROL CHARACTER u'\x0e' # 0x0E -> CONTROL CHARACTER u'\x0f' # 0x0F -> CONTROL CHARACTER u'\x10' # 0x10 -> CONTROL CHARACTER u'\x11' # 0x11 -> CONTROL CHARACTER u'\x12' # 0x12 -> CONTROL CHARACTER u'\x13' # 0x13 -> CONTROL CHARACTER u'\x14' # 0x14 -> CONTROL CHARACTER u'\x15' # 0x15 -> CONTROL CHARACTER u'\x16' # 0x16 -> CONTROL CHARACTER u'\x17' # 0x17 -> CONTROL CHARACTER u'\x18' # 0x18 -> CONTROL CHARACTER u'\x19' # 0x19 -> CONTROL CHARACTER u'\x1a' # 0x1A -> CONTROL CHARACTER u'\x1b' # 0x1B -> CONTROL CHARACTER u'\x1c' # 0x1C -> CONTROL CHARACTER u'\x1d' # 0x1D -> CONTROL CHARACTER u'\x1e' # 0x1E -> CONTROL CHARACTER u'\x1f' # 0x1F -> CONTROL CHARACTER u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> CONTROL CHARACTER u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS u'\u2020' # 0xA0 -> DAGGER u'\xb0' # 0xA1 -> DEGREE SIGN u'\xa2' # 0xA2 -> CENT SIGN u'\xa3' # 0xA3 -> POUND SIGN u'\xa7' # 0xA4 -> SECTION SIGN u'\u2022' # 0xA5 -> BULLET u'\xb6' # 0xA6 -> PILCROW SIGN u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S u'\xae' # 0xA8 -> REGISTERED SIGN u'\xa9' # 0xA9 -> COPYRIGHT SIGN u'\u2122' # 0xAA -> TRADE MARK SIGN u'\xb4' # 0xAB -> ACUTE ACCENT u'\xa8' # 0xAC -> DIAERESIS u'\u2260' # 0xAD -> NOT EQUAL TO u'\u0102' # 0xAE -> LATIN CAPITAL LETTER A WITH BREVE u'\u0218' # 0xAF -> LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later u'\u221e' # 0xB0 -> INFINITY u'\xb1' # 0xB1 -> PLUS-MINUS SIGN u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO u'\xa5' # 0xB4 -> YEN SIGN u'\xb5' # 0xB5 -> MICRO SIGN u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL u'\u2211' # 0xB7 -> N-ARY SUMMATION u'\u220f' # 0xB8 -> N-ARY PRODUCT u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI u'\u222b' # 0xBA -> INTEGRAL u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA u'\u0103' # 0xBE -> LATIN SMALL LETTER A WITH BREVE u'\u0219' # 0xBF -> LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later u'\xbf' # 0xC0 -> INVERTED QUESTION MARK u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK u'\xac' # 0xC2 -> NOT SIGN u'\u221a' # 0xC3 -> SQUARE ROOT u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK u'\u2248' # 0xC5 -> ALMOST EQUAL TO u'\u2206' # 0xC6 -> INCREMENT u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS u'\xa0' # 0xCA -> NO-BREAK SPACE u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE u'\u2013' # 0xD0 -> EN DASH u'\u2014' # 0xD1 -> EM DASH u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK u'\xf7' # 0xD6 -> DIVISION SIGN u'\u25ca' # 0xD7 -> LOZENGE u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS u'\u2044' # 0xDA -> FRACTION SLASH u'\u20ac' # 0xDB -> EURO SIGN u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later u'\u021b' # 0xDF -> LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later u'\u2021' # 0xE0 -> DOUBLE DAGGER u'\xb7' # 0xE1 -> MIDDLE DOT u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK u'\u2030' # 0xE4 -> PER MILLE SIGN u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\uf8ff' # 0xF0 -> Apple logo u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT u'\u02dc' # 0xF7 -> SMALL TILDE u'\xaf' # 0xF8 -> MACRON u'\u02d8' # 0xF9 -> BREVE u'\u02d9' # 0xFA -> DOT ABOVE u'\u02da' # 0xFB -> RING ABOVE u'\xb8' # 0xFC -> CEDILLA u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT u'\u02db' # 0xFE -> OGONEK u'\u02c7' # 0xFF -> CARON ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) cp500.py 0000644 00000032101 15053241622 0005742 0 ustar 00 """ Python Character Mapping Codec cp500 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp500', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x9c' # 0x04 -> CONTROL u'\t' # 0x05 -> HORIZONTAL TABULATION u'\x86' # 0x06 -> CONTROL u'\x7f' # 0x07 -> DELETE u'\x97' # 0x08 -> CONTROL u'\x8d' # 0x09 -> CONTROL u'\x8e' # 0x0A -> CONTROL u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x9d' # 0x14 -> CONTROL u'\x85' # 0x15 -> CONTROL u'\x08' # 0x16 -> BACKSPACE u'\x87' # 0x17 -> CONTROL u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x92' # 0x1A -> CONTROL u'\x8f' # 0x1B -> CONTROL u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u'\x80' # 0x20 -> CONTROL u'\x81' # 0x21 -> CONTROL u'\x82' # 0x22 -> CONTROL u'\x83' # 0x23 -> CONTROL u'\x84' # 0x24 -> CONTROL u'\n' # 0x25 -> LINE FEED u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK u'\x1b' # 0x27 -> ESCAPE u'\x88' # 0x28 -> CONTROL u'\x89' # 0x29 -> CONTROL u'\x8a' # 0x2A -> CONTROL u'\x8b' # 0x2B -> CONTROL u'\x8c' # 0x2C -> CONTROL u'\x05' # 0x2D -> ENQUIRY u'\x06' # 0x2E -> ACKNOWLEDGE u'\x07' # 0x2F -> BELL u'\x90' # 0x30 -> CONTROL u'\x91' # 0x31 -> CONTROL u'\x16' # 0x32 -> SYNCHRONOUS IDLE u'\x93' # 0x33 -> CONTROL u'\x94' # 0x34 -> CONTROL u'\x95' # 0x35 -> CONTROL u'\x96' # 0x36 -> CONTROL u'\x04' # 0x37 -> END OF TRANSMISSION u'\x98' # 0x38 -> CONTROL u'\x99' # 0x39 -> CONTROL u'\x9a' # 0x3A -> CONTROL u'\x9b' # 0x3B -> CONTROL u'\x14' # 0x3C -> DEVICE CONTROL FOUR u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE u'\x9e' # 0x3E -> CONTROL u'\x1a' # 0x3F -> SUBSTITUTE u' ' # 0x40 -> SPACE u'\xa0' # 0x41 -> NO-BREAK SPACE u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE u'[' # 0x4A -> LEFT SQUARE BRACKET u'.' # 0x4B -> FULL STOP u'<' # 0x4C -> LESS-THAN SIGN u'(' # 0x4D -> LEFT PARENTHESIS u'+' # 0x4E -> PLUS SIGN u'!' # 0x4F -> EXCLAMATION MARK u'&' # 0x50 -> AMPERSAND u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) u']' # 0x5A -> RIGHT SQUARE BRACKET u'$' # 0x5B -> DOLLAR SIGN u'*' # 0x5C -> ASTERISK u')' # 0x5D -> RIGHT PARENTHESIS u';' # 0x5E -> SEMICOLON u'^' # 0x5F -> CIRCUMFLEX ACCENT u'-' # 0x60 -> HYPHEN-MINUS u'/' # 0x61 -> SOLIDUS u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE u'\xa6' # 0x6A -> BROKEN BAR u',' # 0x6B -> COMMA u'%' # 0x6C -> PERCENT SIGN u'_' # 0x6D -> LOW LINE u'>' # 0x6E -> GREATER-THAN SIGN u'?' # 0x6F -> QUESTION MARK u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE u'`' # 0x79 -> GRAVE ACCENT u':' # 0x7A -> COLON u'#' # 0x7B -> NUMBER SIGN u'@' # 0x7C -> COMMERCIAL AT u"'" # 0x7D -> APOSTROPHE u'=' # 0x7E -> EQUALS SIGN u'"' # 0x7F -> QUOTATION MARK u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE u'a' # 0x81 -> LATIN SMALL LETTER A u'b' # 0x82 -> LATIN SMALL LETTER B u'c' # 0x83 -> LATIN SMALL LETTER C u'd' # 0x84 -> LATIN SMALL LETTER D u'e' # 0x85 -> LATIN SMALL LETTER E u'f' # 0x86 -> LATIN SMALL LETTER F u'g' # 0x87 -> LATIN SMALL LETTER G u'h' # 0x88 -> LATIN SMALL LETTER H u'i' # 0x89 -> LATIN SMALL LETTER I u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) u'\xb1' # 0x8F -> PLUS-MINUS SIGN u'\xb0' # 0x90 -> DEGREE SIGN u'j' # 0x91 -> LATIN SMALL LETTER J u'k' # 0x92 -> LATIN SMALL LETTER K u'l' # 0x93 -> LATIN SMALL LETTER L u'm' # 0x94 -> LATIN SMALL LETTER M u'n' # 0x95 -> LATIN SMALL LETTER N u'o' # 0x96 -> LATIN SMALL LETTER O u'p' # 0x97 -> LATIN SMALL LETTER P u'q' # 0x98 -> LATIN SMALL LETTER Q u'r' # 0x99 -> LATIN SMALL LETTER R u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE u'\xb8' # 0x9D -> CEDILLA u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE u'\xa4' # 0x9F -> CURRENCY SIGN u'\xb5' # 0xA0 -> MICRO SIGN u'~' # 0xA1 -> TILDE u's' # 0xA2 -> LATIN SMALL LETTER S u't' # 0xA3 -> LATIN SMALL LETTER T u'u' # 0xA4 -> LATIN SMALL LETTER U u'v' # 0xA5 -> LATIN SMALL LETTER V u'w' # 0xA6 -> LATIN SMALL LETTER W u'x' # 0xA7 -> LATIN SMALL LETTER X u'y' # 0xA8 -> LATIN SMALL LETTER Y u'z' # 0xA9 -> LATIN SMALL LETTER Z u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK u'\xbf' # 0xAB -> INVERTED QUESTION MARK u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) u'\xae' # 0xAF -> REGISTERED SIGN u'\xa2' # 0xB0 -> CENT SIGN u'\xa3' # 0xB1 -> POUND SIGN u'\xa5' # 0xB2 -> YEN SIGN u'\xb7' # 0xB3 -> MIDDLE DOT u'\xa9' # 0xB4 -> COPYRIGHT SIGN u'\xa7' # 0xB5 -> SECTION SIGN u'\xb6' # 0xB6 -> PILCROW SIGN u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS u'\xac' # 0xBA -> NOT SIGN u'|' # 0xBB -> VERTICAL LINE u'\xaf' # 0xBC -> MACRON u'\xa8' # 0xBD -> DIAERESIS u'\xb4' # 0xBE -> ACUTE ACCENT u'\xd7' # 0xBF -> MULTIPLICATION SIGN u'{' # 0xC0 -> LEFT CURLY BRACKET u'A' # 0xC1 -> LATIN CAPITAL LETTER A u'B' # 0xC2 -> LATIN CAPITAL LETTER B u'C' # 0xC3 -> LATIN CAPITAL LETTER C u'D' # 0xC4 -> LATIN CAPITAL LETTER D u'E' # 0xC5 -> LATIN CAPITAL LETTER E u'F' # 0xC6 -> LATIN CAPITAL LETTER F u'G' # 0xC7 -> LATIN CAPITAL LETTER G u'H' # 0xC8 -> LATIN CAPITAL LETTER H u'I' # 0xC9 -> LATIN CAPITAL LETTER I u'\xad' # 0xCA -> SOFT HYPHEN u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE u'}' # 0xD0 -> RIGHT CURLY BRACKET u'J' # 0xD1 -> LATIN CAPITAL LETTER J u'K' # 0xD2 -> LATIN CAPITAL LETTER K u'L' # 0xD3 -> LATIN CAPITAL LETTER L u'M' # 0xD4 -> LATIN CAPITAL LETTER M u'N' # 0xD5 -> LATIN CAPITAL LETTER N u'O' # 0xD6 -> LATIN CAPITAL LETTER O u'P' # 0xD7 -> LATIN CAPITAL LETTER P u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q u'R' # 0xD9 -> LATIN CAPITAL LETTER R u'\xb9' # 0xDA -> SUPERSCRIPT ONE u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS u'\\' # 0xE0 -> REVERSE SOLIDUS u'\xf7' # 0xE1 -> DIVISION SIGN u'S' # 0xE2 -> LATIN CAPITAL LETTER S u'T' # 0xE3 -> LATIN CAPITAL LETTER T u'U' # 0xE4 -> LATIN CAPITAL LETTER U u'V' # 0xE5 -> LATIN CAPITAL LETTER V u'W' # 0xE6 -> LATIN CAPITAL LETTER W u'X' # 0xE7 -> LATIN CAPITAL LETTER X u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z u'\xb2' # 0xEA -> SUPERSCRIPT TWO u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE u'0' # 0xF0 -> DIGIT ZERO u'1' # 0xF1 -> DIGIT ONE u'2' # 0xF2 -> DIGIT TWO u'3' # 0xF3 -> DIGIT THREE u'4' # 0xF4 -> DIGIT FOUR u'5' # 0xF5 -> DIGIT FIVE u'6' # 0xF6 -> DIGIT SIX u'7' # 0xF7 -> DIGIT SEVEN u'8' # 0xF8 -> DIGIT EIGHT u'9' # 0xF9 -> DIGIT NINE u'\xb3' # 0xFA -> SUPERSCRIPT THREE u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE u'\x9f' # 0xFF -> CONTROL ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) euc_jisx0213.py 0000644 00000002033 15053241622 0007233 0 ustar 00 # # euc_jisx0213.py: Python Unicode Codec for EUC_JISX0213 # # Written by Hye-Shik Chang <perky@FreeBSD.org> # import _codecs_jp, codecs import _multibytecodec as mbc codec = _codecs_jp.getcodec('euc_jisx0213') class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, codecs.IncrementalEncoder): codec = codec class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, codecs.IncrementalDecoder): codec = codec class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): codec = codec class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): codec = codec def getregentry(): return codecs.CodecInfo( name='euc_jisx0213', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) cp1256.py 0000644 00000031416 15053241622 0006043 0 ustar 00 """ Python Character Mapping Codec cp1256 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp1256', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x04' # 0x04 -> END OF TRANSMISSION u'\x05' # 0x05 -> ENQUIRY u'\x06' # 0x06 -> ACKNOWLEDGE u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION u'\n' # 0x0A -> LINE FEED u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x14' # 0x14 -> DEVICE CONTROL FOUR u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x16 -> SYNCHRONOUS IDLE u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x1a' # 0x1A -> SUBSTITUTE u'\x1b' # 0x1B -> ESCAPE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> DELETE u'\u20ac' # 0x80 -> EURO SIGN u'\u067e' # 0x81 -> ARABIC LETTER PEH u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS u'\u2020' # 0x86 -> DAGGER u'\u2021' # 0x87 -> DOUBLE DAGGER u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT u'\u2030' # 0x89 -> PER MILLE SIGN u'\u0679' # 0x8A -> ARABIC LETTER TTEH u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE u'\u0686' # 0x8D -> ARABIC LETTER TCHEH u'\u0698' # 0x8E -> ARABIC LETTER JEH u'\u0688' # 0x8F -> ARABIC LETTER DDAL u'\u06af' # 0x90 -> ARABIC LETTER GAF u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK u'\u2022' # 0x95 -> BULLET u'\u2013' # 0x96 -> EN DASH u'\u2014' # 0x97 -> EM DASH u'\u06a9' # 0x98 -> ARABIC LETTER KEHEH u'\u2122' # 0x99 -> TRADE MARK SIGN u'\u0691' # 0x9A -> ARABIC LETTER RREH u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE u'\u200c' # 0x9D -> ZERO WIDTH NON-JOINER u'\u200d' # 0x9E -> ZERO WIDTH JOINER u'\u06ba' # 0x9F -> ARABIC LETTER NOON GHUNNA u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\u060c' # 0xA1 -> ARABIC COMMA u'\xa2' # 0xA2 -> CENT SIGN u'\xa3' # 0xA3 -> POUND SIGN u'\xa4' # 0xA4 -> CURRENCY SIGN u'\xa5' # 0xA5 -> YEN SIGN u'\xa6' # 0xA6 -> BROKEN BAR u'\xa7' # 0xA7 -> SECTION SIGN u'\xa8' # 0xA8 -> DIAERESIS u'\xa9' # 0xA9 -> COPYRIGHT SIGN u'\u06be' # 0xAA -> ARABIC LETTER HEH DOACHASHMEE u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xac' # 0xAC -> NOT SIGN u'\xad' # 0xAD -> SOFT HYPHEN u'\xae' # 0xAE -> REGISTERED SIGN u'\xaf' # 0xAF -> MACRON u'\xb0' # 0xB0 -> DEGREE SIGN u'\xb1' # 0xB1 -> PLUS-MINUS SIGN u'\xb2' # 0xB2 -> SUPERSCRIPT TWO u'\xb3' # 0xB3 -> SUPERSCRIPT THREE u'\xb4' # 0xB4 -> ACUTE ACCENT u'\xb5' # 0xB5 -> MICRO SIGN u'\xb6' # 0xB6 -> PILCROW SIGN u'\xb7' # 0xB7 -> MIDDLE DOT u'\xb8' # 0xB8 -> CEDILLA u'\xb9' # 0xB9 -> SUPERSCRIPT ONE u'\u061b' # 0xBA -> ARABIC SEMICOLON u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS u'\u061f' # 0xBF -> ARABIC QUESTION MARK u'\u06c1' # 0xC0 -> ARABIC LETTER HEH GOAL u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE u'\u0627' # 0xC7 -> ARABIC LETTER ALEF u'\u0628' # 0xC8 -> ARABIC LETTER BEH u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA u'\u062a' # 0xCA -> ARABIC LETTER TEH u'\u062b' # 0xCB -> ARABIC LETTER THEH u'\u062c' # 0xCC -> ARABIC LETTER JEEM u'\u062d' # 0xCD -> ARABIC LETTER HAH u'\u062e' # 0xCE -> ARABIC LETTER KHAH u'\u062f' # 0xCF -> ARABIC LETTER DAL u'\u0630' # 0xD0 -> ARABIC LETTER THAL u'\u0631' # 0xD1 -> ARABIC LETTER REH u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN u'\u0633' # 0xD3 -> ARABIC LETTER SEEN u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN u'\u0635' # 0xD5 -> ARABIC LETTER SAD u'\u0636' # 0xD6 -> ARABIC LETTER DAD u'\xd7' # 0xD7 -> MULTIPLICATION SIGN u'\u0637' # 0xD8 -> ARABIC LETTER TAH u'\u0638' # 0xD9 -> ARABIC LETTER ZAH u'\u0639' # 0xDA -> ARABIC LETTER AIN u'\u063a' # 0xDB -> ARABIC LETTER GHAIN u'\u0640' # 0xDC -> ARABIC TATWEEL u'\u0641' # 0xDD -> ARABIC LETTER FEH u'\u0642' # 0xDE -> ARABIC LETTER QAF u'\u0643' # 0xDF -> ARABIC LETTER KAF u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE u'\u0644' # 0xE1 -> ARABIC LETTER LAM u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\u0645' # 0xE3 -> ARABIC LETTER MEEM u'\u0646' # 0xE4 -> ARABIC LETTER NOON u'\u0647' # 0xE5 -> ARABIC LETTER HEH u'\u0648' # 0xE6 -> ARABIC LETTER WAW u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS u'\u0649' # 0xEC -> ARABIC LETTER ALEF MAKSURA u'\u064a' # 0xED -> ARABIC LETTER YEH u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS u'\u064b' # 0xF0 -> ARABIC FATHATAN u'\u064c' # 0xF1 -> ARABIC DAMMATAN u'\u064d' # 0xF2 -> ARABIC KASRATAN u'\u064e' # 0xF3 -> ARABIC FATHA u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\u064f' # 0xF5 -> ARABIC DAMMA u'\u0650' # 0xF6 -> ARABIC KASRA u'\xf7' # 0xF7 -> DIVISION SIGN u'\u0651' # 0xF8 -> ARABIC SHADDA u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE u'\u0652' # 0xFA -> ARABIC SUKUN u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) cp1251.py 0000644 00000032461 15053241622 0006037 0 ustar 00 """ Python Character Mapping Codec cp1251 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp1251', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x04' # 0x04 -> END OF TRANSMISSION u'\x05' # 0x05 -> ENQUIRY u'\x06' # 0x06 -> ACKNOWLEDGE u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION u'\n' # 0x0A -> LINE FEED u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x14' # 0x14 -> DEVICE CONTROL FOUR u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x16 -> SYNCHRONOUS IDLE u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x1a' # 0x1A -> SUBSTITUTE u'\x1b' # 0x1B -> ESCAPE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> DELETE u'\u0402' # 0x80 -> CYRILLIC CAPITAL LETTER DJE u'\u0403' # 0x81 -> CYRILLIC CAPITAL LETTER GJE u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK u'\u0453' # 0x83 -> CYRILLIC SMALL LETTER GJE u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS u'\u2020' # 0x86 -> DAGGER u'\u2021' # 0x87 -> DOUBLE DAGGER u'\u20ac' # 0x88 -> EURO SIGN u'\u2030' # 0x89 -> PER MILLE SIGN u'\u0409' # 0x8A -> CYRILLIC CAPITAL LETTER LJE u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK u'\u040a' # 0x8C -> CYRILLIC CAPITAL LETTER NJE u'\u040c' # 0x8D -> CYRILLIC CAPITAL LETTER KJE u'\u040b' # 0x8E -> CYRILLIC CAPITAL LETTER TSHE u'\u040f' # 0x8F -> CYRILLIC CAPITAL LETTER DZHE u'\u0452' # 0x90 -> CYRILLIC SMALL LETTER DJE u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK u'\u2022' # 0x95 -> BULLET u'\u2013' # 0x96 -> EN DASH u'\u2014' # 0x97 -> EM DASH u'\ufffe' # 0x98 -> UNDEFINED u'\u2122' # 0x99 -> TRADE MARK SIGN u'\u0459' # 0x9A -> CYRILLIC SMALL LETTER LJE u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK u'\u045a' # 0x9C -> CYRILLIC SMALL LETTER NJE u'\u045c' # 0x9D -> CYRILLIC SMALL LETTER KJE u'\u045b' # 0x9E -> CYRILLIC SMALL LETTER TSHE u'\u045f' # 0x9F -> CYRILLIC SMALL LETTER DZHE u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\u040e' # 0xA1 -> CYRILLIC CAPITAL LETTER SHORT U u'\u045e' # 0xA2 -> CYRILLIC SMALL LETTER SHORT U u'\u0408' # 0xA3 -> CYRILLIC CAPITAL LETTER JE u'\xa4' # 0xA4 -> CURRENCY SIGN u'\u0490' # 0xA5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN u'\xa6' # 0xA6 -> BROKEN BAR u'\xa7' # 0xA7 -> SECTION SIGN u'\u0401' # 0xA8 -> CYRILLIC CAPITAL LETTER IO u'\xa9' # 0xA9 -> COPYRIGHT SIGN u'\u0404' # 0xAA -> CYRILLIC CAPITAL LETTER UKRAINIAN IE u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xac' # 0xAC -> NOT SIGN u'\xad' # 0xAD -> SOFT HYPHEN u'\xae' # 0xAE -> REGISTERED SIGN u'\u0407' # 0xAF -> CYRILLIC CAPITAL LETTER YI u'\xb0' # 0xB0 -> DEGREE SIGN u'\xb1' # 0xB1 -> PLUS-MINUS SIGN u'\u0406' # 0xB2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I u'\u0456' # 0xB3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I u'\u0491' # 0xB4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN u'\xb5' # 0xB5 -> MICRO SIGN u'\xb6' # 0xB6 -> PILCROW SIGN u'\xb7' # 0xB7 -> MIDDLE DOT u'\u0451' # 0xB8 -> CYRILLIC SMALL LETTER IO u'\u2116' # 0xB9 -> NUMERO SIGN u'\u0454' # 0xBA -> CYRILLIC SMALL LETTER UKRAINIAN IE u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u0458' # 0xBC -> CYRILLIC SMALL LETTER JE u'\u0405' # 0xBD -> CYRILLIC CAPITAL LETTER DZE u'\u0455' # 0xBE -> CYRILLIC SMALL LETTER DZE u'\u0457' # 0xBF -> CYRILLIC SMALL LETTER YI u'\u0410' # 0xC0 -> CYRILLIC CAPITAL LETTER A u'\u0411' # 0xC1 -> CYRILLIC CAPITAL LETTER BE u'\u0412' # 0xC2 -> CYRILLIC CAPITAL LETTER VE u'\u0413' # 0xC3 -> CYRILLIC CAPITAL LETTER GHE u'\u0414' # 0xC4 -> CYRILLIC CAPITAL LETTER DE u'\u0415' # 0xC5 -> CYRILLIC CAPITAL LETTER IE u'\u0416' # 0xC6 -> CYRILLIC CAPITAL LETTER ZHE u'\u0417' # 0xC7 -> CYRILLIC CAPITAL LETTER ZE u'\u0418' # 0xC8 -> CYRILLIC CAPITAL LETTER I u'\u0419' # 0xC9 -> CYRILLIC CAPITAL LETTER SHORT I u'\u041a' # 0xCA -> CYRILLIC CAPITAL LETTER KA u'\u041b' # 0xCB -> CYRILLIC CAPITAL LETTER EL u'\u041c' # 0xCC -> CYRILLIC CAPITAL LETTER EM u'\u041d' # 0xCD -> CYRILLIC CAPITAL LETTER EN u'\u041e' # 0xCE -> CYRILLIC CAPITAL LETTER O u'\u041f' # 0xCF -> CYRILLIC CAPITAL LETTER PE u'\u0420' # 0xD0 -> CYRILLIC CAPITAL LETTER ER u'\u0421' # 0xD1 -> CYRILLIC CAPITAL LETTER ES u'\u0422' # 0xD2 -> CYRILLIC CAPITAL LETTER TE u'\u0423' # 0xD3 -> CYRILLIC CAPITAL LETTER U u'\u0424' # 0xD4 -> CYRILLIC CAPITAL LETTER EF u'\u0425' # 0xD5 -> CYRILLIC CAPITAL LETTER HA u'\u0426' # 0xD6 -> CYRILLIC CAPITAL LETTER TSE u'\u0427' # 0xD7 -> CYRILLIC CAPITAL LETTER CHE u'\u0428' # 0xD8 -> CYRILLIC CAPITAL LETTER SHA u'\u0429' # 0xD9 -> CYRILLIC CAPITAL LETTER SHCHA u'\u042a' # 0xDA -> CYRILLIC CAPITAL LETTER HARD SIGN u'\u042b' # 0xDB -> CYRILLIC CAPITAL LETTER YERU u'\u042c' # 0xDC -> CYRILLIC CAPITAL LETTER SOFT SIGN u'\u042d' # 0xDD -> CYRILLIC CAPITAL LETTER E u'\u042e' # 0xDE -> CYRILLIC CAPITAL LETTER YU u'\u042f' # 0xDF -> CYRILLIC CAPITAL LETTER YA u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU u'\u044f' # 0xFF -> CYRILLIC SMALL LETTER YA ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) kz1048.py 0000644 00000032633 15053241622 0006066 0 ustar 00 """ Python Character Mapping Codec kz1048 generated from 'MAPPINGS/VENDORS/MISC/KZ1048.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_table) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input, self.errors, encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input, self.errors, decoding_table)[0] class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='kz1048', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( '\x00' # 0x00 -> NULL '\x01' # 0x01 -> START OF HEADING '\x02' # 0x02 -> START OF TEXT '\x03' # 0x03 -> END OF TEXT '\x04' # 0x04 -> END OF TRANSMISSION '\x05' # 0x05 -> ENQUIRY '\x06' # 0x06 -> ACKNOWLEDGE '\x07' # 0x07 -> BELL '\x08' # 0x08 -> BACKSPACE '\t' # 0x09 -> HORIZONTAL TABULATION '\n' # 0x0A -> LINE FEED '\x0b' # 0x0B -> VERTICAL TABULATION '\x0c' # 0x0C -> FORM FEED '\r' # 0x0D -> CARRIAGE RETURN '\x0e' # 0x0E -> SHIFT OUT '\x0f' # 0x0F -> SHIFT IN '\x10' # 0x10 -> DATA LINK ESCAPE '\x11' # 0x11 -> DEVICE CONTROL ONE '\x12' # 0x12 -> DEVICE CONTROL TWO '\x13' # 0x13 -> DEVICE CONTROL THREE '\x14' # 0x14 -> DEVICE CONTROL FOUR '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE '\x16' # 0x16 -> SYNCHRONOUS IDLE '\x17' # 0x17 -> END OF TRANSMISSION BLOCK '\x18' # 0x18 -> CANCEL '\x19' # 0x19 -> END OF MEDIUM '\x1a' # 0x1A -> SUBSTITUTE '\x1b' # 0x1B -> ESCAPE '\x1c' # 0x1C -> FILE SEPARATOR '\x1d' # 0x1D -> GROUP SEPARATOR '\x1e' # 0x1E -> RECORD SEPARATOR '\x1f' # 0x1F -> UNIT SEPARATOR ' ' # 0x20 -> SPACE '!' # 0x21 -> EXCLAMATION MARK '"' # 0x22 -> QUOTATION MARK '#' # 0x23 -> NUMBER SIGN '$' # 0x24 -> DOLLAR SIGN '%' # 0x25 -> PERCENT SIGN '&' # 0x26 -> AMPERSAND "'" # 0x27 -> APOSTROPHE '(' # 0x28 -> LEFT PARENTHESIS ')' # 0x29 -> RIGHT PARENTHESIS '*' # 0x2A -> ASTERISK '+' # 0x2B -> PLUS SIGN ',' # 0x2C -> COMMA '-' # 0x2D -> HYPHEN-MINUS '.' # 0x2E -> FULL STOP '/' # 0x2F -> SOLIDUS '0' # 0x30 -> DIGIT ZERO '1' # 0x31 -> DIGIT ONE '2' # 0x32 -> DIGIT TWO '3' # 0x33 -> DIGIT THREE '4' # 0x34 -> DIGIT FOUR '5' # 0x35 -> DIGIT FIVE '6' # 0x36 -> DIGIT SIX '7' # 0x37 -> DIGIT SEVEN '8' # 0x38 -> DIGIT EIGHT '9' # 0x39 -> DIGIT NINE ':' # 0x3A -> COLON ';' # 0x3B -> SEMICOLON '<' # 0x3C -> LESS-THAN SIGN '=' # 0x3D -> EQUALS SIGN '>' # 0x3E -> GREATER-THAN SIGN '?' # 0x3F -> QUESTION MARK '@' # 0x40 -> COMMERCIAL AT 'A' # 0x41 -> LATIN CAPITAL LETTER A 'B' # 0x42 -> LATIN CAPITAL LETTER B 'C' # 0x43 -> LATIN CAPITAL LETTER C 'D' # 0x44 -> LATIN CAPITAL LETTER D 'E' # 0x45 -> LATIN CAPITAL LETTER E 'F' # 0x46 -> LATIN CAPITAL LETTER F 'G' # 0x47 -> LATIN CAPITAL LETTER G 'H' # 0x48 -> LATIN CAPITAL LETTER H 'I' # 0x49 -> LATIN CAPITAL LETTER I 'J' # 0x4A -> LATIN CAPITAL LETTER J 'K' # 0x4B -> LATIN CAPITAL LETTER K 'L' # 0x4C -> LATIN CAPITAL LETTER L 'M' # 0x4D -> LATIN CAPITAL LETTER M 'N' # 0x4E -> LATIN CAPITAL LETTER N 'O' # 0x4F -> LATIN CAPITAL LETTER O 'P' # 0x50 -> LATIN CAPITAL LETTER P 'Q' # 0x51 -> LATIN CAPITAL LETTER Q 'R' # 0x52 -> LATIN CAPITAL LETTER R 'S' # 0x53 -> LATIN CAPITAL LETTER S 'T' # 0x54 -> LATIN CAPITAL LETTER T 'U' # 0x55 -> LATIN CAPITAL LETTER U 'V' # 0x56 -> LATIN CAPITAL LETTER V 'W' # 0x57 -> LATIN CAPITAL LETTER W 'X' # 0x58 -> LATIN CAPITAL LETTER X 'Y' # 0x59 -> LATIN CAPITAL LETTER Y 'Z' # 0x5A -> LATIN CAPITAL LETTER Z '[' # 0x5B -> LEFT SQUARE BRACKET '\\' # 0x5C -> REVERSE SOLIDUS ']' # 0x5D -> RIGHT SQUARE BRACKET '^' # 0x5E -> CIRCUMFLEX ACCENT '_' # 0x5F -> LOW LINE '`' # 0x60 -> GRAVE ACCENT 'a' # 0x61 -> LATIN SMALL LETTER A 'b' # 0x62 -> LATIN SMALL LETTER B 'c' # 0x63 -> LATIN SMALL LETTER C 'd' # 0x64 -> LATIN SMALL LETTER D 'e' # 0x65 -> LATIN SMALL LETTER E 'f' # 0x66 -> LATIN SMALL LETTER F 'g' # 0x67 -> LATIN SMALL LETTER G 'h' # 0x68 -> LATIN SMALL LETTER H 'i' # 0x69 -> LATIN SMALL LETTER I 'j' # 0x6A -> LATIN SMALL LETTER J 'k' # 0x6B -> LATIN SMALL LETTER K 'l' # 0x6C -> LATIN SMALL LETTER L 'm' # 0x6D -> LATIN SMALL LETTER M 'n' # 0x6E -> LATIN SMALL LETTER N 'o' # 0x6F -> LATIN SMALL LETTER O 'p' # 0x70 -> LATIN SMALL LETTER P 'q' # 0x71 -> LATIN SMALL LETTER Q 'r' # 0x72 -> LATIN SMALL LETTER R 's' # 0x73 -> LATIN SMALL LETTER S 't' # 0x74 -> LATIN SMALL LETTER T 'u' # 0x75 -> LATIN SMALL LETTER U 'v' # 0x76 -> LATIN SMALL LETTER V 'w' # 0x77 -> LATIN SMALL LETTER W 'x' # 0x78 -> LATIN SMALL LETTER X 'y' # 0x79 -> LATIN SMALL LETTER Y 'z' # 0x7A -> LATIN SMALL LETTER Z '{' # 0x7B -> LEFT CURLY BRACKET '|' # 0x7C -> VERTICAL LINE '}' # 0x7D -> RIGHT CURLY BRACKET '~' # 0x7E -> TILDE '\x7f' # 0x7F -> DELETE '\u0402' # 0x80 -> CYRILLIC CAPITAL LETTER DJE '\u0403' # 0x81 -> CYRILLIC CAPITAL LETTER GJE '\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK '\u0453' # 0x83 -> CYRILLIC SMALL LETTER GJE '\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS '\u2020' # 0x86 -> DAGGER '\u2021' # 0x87 -> DOUBLE DAGGER '\u20ac' # 0x88 -> EURO SIGN '\u2030' # 0x89 -> PER MILLE SIGN '\u0409' # 0x8A -> CYRILLIC CAPITAL LETTER LJE '\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK '\u040a' # 0x8C -> CYRILLIC CAPITAL LETTER NJE '\u049a' # 0x8D -> CYRILLIC CAPITAL LETTER KA WITH DESCENDER '\u04ba' # 0x8E -> CYRILLIC CAPITAL LETTER SHHA '\u040f' # 0x8F -> CYRILLIC CAPITAL LETTER DZHE '\u0452' # 0x90 -> CYRILLIC SMALL LETTER DJE '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK '\u2022' # 0x95 -> BULLET '\u2013' # 0x96 -> EN DASH '\u2014' # 0x97 -> EM DASH '\ufffe' # 0x98 -> UNDEFINED '\u2122' # 0x99 -> TRADE MARK SIGN '\u0459' # 0x9A -> CYRILLIC SMALL LETTER LJE '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK '\u045a' # 0x9C -> CYRILLIC SMALL LETTER NJE '\u049b' # 0x9D -> CYRILLIC SMALL LETTER KA WITH DESCENDER '\u04bb' # 0x9E -> CYRILLIC SMALL LETTER SHHA '\u045f' # 0x9F -> CYRILLIC SMALL LETTER DZHE '\xa0' # 0xA0 -> NO-BREAK SPACE '\u04b0' # 0xA1 -> CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE '\u04b1' # 0xA2 -> CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE '\u04d8' # 0xA3 -> CYRILLIC CAPITAL LETTER SCHWA '\xa4' # 0xA4 -> CURRENCY SIGN '\u04e8' # 0xA5 -> CYRILLIC CAPITAL LETTER BARRED O '\xa6' # 0xA6 -> BROKEN BAR '\xa7' # 0xA7 -> SECTION SIGN '\u0401' # 0xA8 -> CYRILLIC CAPITAL LETTER IO '\xa9' # 0xA9 -> COPYRIGHT SIGN '\u0492' # 0xAA -> CYRILLIC CAPITAL LETTER GHE WITH STROKE '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK '\xac' # 0xAC -> NOT SIGN '\xad' # 0xAD -> SOFT HYPHEN '\xae' # 0xAE -> REGISTERED SIGN '\u04ae' # 0xAF -> CYRILLIC CAPITAL LETTER STRAIGHT U '\xb0' # 0xB0 -> DEGREE SIGN '\xb1' # 0xB1 -> PLUS-MINUS SIGN '\u0406' # 0xB2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I '\u0456' # 0xB3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I '\u04e9' # 0xB4 -> CYRILLIC SMALL LETTER BARRED O '\xb5' # 0xB5 -> MICRO SIGN '\xb6' # 0xB6 -> PILCROW SIGN '\xb7' # 0xB7 -> MIDDLE DOT '\u0451' # 0xB8 -> CYRILLIC SMALL LETTER IO '\u2116' # 0xB9 -> NUMERO SIGN '\u0493' # 0xBA -> CYRILLIC SMALL LETTER GHE WITH STROKE '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK '\u04d9' # 0xBC -> CYRILLIC SMALL LETTER SCHWA '\u04a2' # 0xBD -> CYRILLIC CAPITAL LETTER EN WITH DESCENDER '\u04a3' # 0xBE -> CYRILLIC SMALL LETTER EN WITH DESCENDER '\u04af' # 0xBF -> CYRILLIC SMALL LETTER STRAIGHT U '\u0410' # 0xC0 -> CYRILLIC CAPITAL LETTER A '\u0411' # 0xC1 -> CYRILLIC CAPITAL LETTER BE '\u0412' # 0xC2 -> CYRILLIC CAPITAL LETTER VE '\u0413' # 0xC3 -> CYRILLIC CAPITAL LETTER GHE '\u0414' # 0xC4 -> CYRILLIC CAPITAL LETTER DE '\u0415' # 0xC5 -> CYRILLIC CAPITAL LETTER IE '\u0416' # 0xC6 -> CYRILLIC CAPITAL LETTER ZHE '\u0417' # 0xC7 -> CYRILLIC CAPITAL LETTER ZE '\u0418' # 0xC8 -> CYRILLIC CAPITAL LETTER I '\u0419' # 0xC9 -> CYRILLIC CAPITAL LETTER SHORT I '\u041a' # 0xCA -> CYRILLIC CAPITAL LETTER KA '\u041b' # 0xCB -> CYRILLIC CAPITAL LETTER EL '\u041c' # 0xCC -> CYRILLIC CAPITAL LETTER EM '\u041d' # 0xCD -> CYRILLIC CAPITAL LETTER EN '\u041e' # 0xCE -> CYRILLIC CAPITAL LETTER O '\u041f' # 0xCF -> CYRILLIC CAPITAL LETTER PE '\u0420' # 0xD0 -> CYRILLIC CAPITAL LETTER ER '\u0421' # 0xD1 -> CYRILLIC CAPITAL LETTER ES '\u0422' # 0xD2 -> CYRILLIC CAPITAL LETTER TE '\u0423' # 0xD3 -> CYRILLIC CAPITAL LETTER U '\u0424' # 0xD4 -> CYRILLIC CAPITAL LETTER EF '\u0425' # 0xD5 -> CYRILLIC CAPITAL LETTER HA '\u0426' # 0xD6 -> CYRILLIC CAPITAL LETTER TSE '\u0427' # 0xD7 -> CYRILLIC CAPITAL LETTER CHE '\u0428' # 0xD8 -> CYRILLIC CAPITAL LETTER SHA '\u0429' # 0xD9 -> CYRILLIC CAPITAL LETTER SHCHA '\u042a' # 0xDA -> CYRILLIC CAPITAL LETTER HARD SIGN '\u042b' # 0xDB -> CYRILLIC CAPITAL LETTER YERU '\u042c' # 0xDC -> CYRILLIC CAPITAL LETTER SOFT SIGN '\u042d' # 0xDD -> CYRILLIC CAPITAL LETTER E '\u042e' # 0xDE -> CYRILLIC CAPITAL LETTER YU '\u042f' # 0xDF -> CYRILLIC CAPITAL LETTER YA '\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A '\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE '\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE '\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE '\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE '\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE '\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE '\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE '\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I '\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I '\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA '\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL '\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM '\u043d' # 0xED -> CYRILLIC SMALL LETTER EN '\u043e' # 0xEE -> CYRILLIC SMALL LETTER O '\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE '\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER '\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES '\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE '\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U '\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF '\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA '\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE '\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE '\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA '\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA '\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN '\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU '\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN '\u044d' # 0xFD -> CYRILLIC SMALL LETTER E '\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU '\u044f' # 0xFF -> CYRILLIC SMALL LETTER YA ) ### Encoding table encoding_table = codecs.charmap_build(decoding_table) cp861.py 0000644 00000104111 15053241622 0005755 0 ustar 00 """ Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP861.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp861', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE 0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH 0x008c: 0x00f0, # LATIN SMALL LETTER ETH 0x008d: 0x00de, # LATIN CAPITAL LETTER THORN 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS 0x0095: 0x00fe, # LATIN SMALL LETTER THORN 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE 0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE 0x009c: 0x00a3, # POUND SIGN 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE 0x009e: 0x20a7, # PESETA SIGN 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE 0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE 0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE 0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00a8: 0x00bf, # INVERTED QUESTION MARK 0x00a9: 0x2310, # REVERSED NOT SIGN 0x00aa: 0x00ac, # NOT SIGN 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00b0: 0x2591, # LIGHT SHADE 0x00b1: 0x2592, # MEDIUM SHADE 0x00b2: 0x2593, # DARK SHADE 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x00db: 0x2588, # FULL BLOCK 0x00dc: 0x2584, # LOWER HALF BLOCK 0x00dd: 0x258c, # LEFT HALF BLOCK 0x00de: 0x2590, # RIGHT HALF BLOCK 0x00df: 0x2580, # UPPER HALF BLOCK 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA 0x00e3: 0x03c0, # GREEK SMALL LETTER PI 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA 0x00e6: 0x00b5, # MICRO SIGN 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA 0x00ec: 0x221e, # INFINITY 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON 0x00ef: 0x2229, # INTERSECTION 0x00f0: 0x2261, # IDENTICAL TO 0x00f1: 0x00b1, # PLUS-MINUS SIGN 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO 0x00f4: 0x2320, # TOP HALF INTEGRAL 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL 0x00f6: 0x00f7, # DIVISION SIGN 0x00f7: 0x2248, # ALMOST EQUAL TO 0x00f8: 0x00b0, # DEGREE SIGN 0x00f9: 0x2219, # BULLET OPERATOR 0x00fa: 0x00b7, # MIDDLE DOT 0x00fb: 0x221a, # SQUARE ROOT 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> NULL u'\x01' # 0x0001 -> START OF HEADING u'\x02' # 0x0002 -> START OF TEXT u'\x03' # 0x0003 -> END OF TEXT u'\x04' # 0x0004 -> END OF TRANSMISSION u'\x05' # 0x0005 -> ENQUIRY u'\x06' # 0x0006 -> ACKNOWLEDGE u'\x07' # 0x0007 -> BELL u'\x08' # 0x0008 -> BACKSPACE u'\t' # 0x0009 -> HORIZONTAL TABULATION u'\n' # 0x000a -> LINE FEED u'\x0b' # 0x000b -> VERTICAL TABULATION u'\x0c' # 0x000c -> FORM FEED u'\r' # 0x000d -> CARRIAGE RETURN u'\x0e' # 0x000e -> SHIFT OUT u'\x0f' # 0x000f -> SHIFT IN u'\x10' # 0x0010 -> DATA LINK ESCAPE u'\x11' # 0x0011 -> DEVICE CONTROL ONE u'\x12' # 0x0012 -> DEVICE CONTROL TWO u'\x13' # 0x0013 -> DEVICE CONTROL THREE u'\x14' # 0x0014 -> DEVICE CONTROL FOUR u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x0016 -> SYNCHRONOUS IDLE u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK u'\x18' # 0x0018 -> CANCEL u'\x19' # 0x0019 -> END OF MEDIUM u'\x1a' # 0x001a -> SUBSTITUTE u'\x1b' # 0x001b -> ESCAPE u'\x1c' # 0x001c -> FILE SEPARATOR u'\x1d' # 0x001d -> GROUP SEPARATOR u'\x1e' # 0x001e -> RECORD SEPARATOR u'\x1f' # 0x001f -> UNIT SEPARATOR u' ' # 0x0020 -> SPACE u'!' # 0x0021 -> EXCLAMATION MARK u'"' # 0x0022 -> QUOTATION MARK u'#' # 0x0023 -> NUMBER SIGN u'$' # 0x0024 -> DOLLAR SIGN u'%' # 0x0025 -> PERCENT SIGN u'&' # 0x0026 -> AMPERSAND u"'" # 0x0027 -> APOSTROPHE u'(' # 0x0028 -> LEFT PARENTHESIS u')' # 0x0029 -> RIGHT PARENTHESIS u'*' # 0x002a -> ASTERISK u'+' # 0x002b -> PLUS SIGN u',' # 0x002c -> COMMA u'-' # 0x002d -> HYPHEN-MINUS u'.' # 0x002e -> FULL STOP u'/' # 0x002f -> SOLIDUS u'0' # 0x0030 -> DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE u'2' # 0x0032 -> DIGIT TWO u'3' # 0x0033 -> DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE u':' # 0x003a -> COLON u';' # 0x003b -> SEMICOLON u'<' # 0x003c -> LESS-THAN SIGN u'=' # 0x003d -> EQUALS SIGN u'>' # 0x003e -> GREATER-THAN SIGN u'?' # 0x003f -> QUESTION MARK u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET u'\\' # 0x005c -> REVERSE SOLIDUS u']' # 0x005d -> RIGHT SQUARE BRACKET u'^' # 0x005e -> CIRCUMFLEX ACCENT u'_' # 0x005f -> LOW LINE u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET u'|' # 0x007c -> VERTICAL LINE u'}' # 0x007d -> RIGHT CURLY BRACKET u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> DELETE u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE u'\xd0' # 0x008b -> LATIN CAPITAL LETTER ETH u'\xf0' # 0x008c -> LATIN SMALL LETTER ETH u'\xde' # 0x008d -> LATIN CAPITAL LETTER THORN u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS u'\xfe' # 0x0095 -> LATIN SMALL LETTER THORN u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xdd' # 0x0097 -> LATIN CAPITAL LETTER Y WITH ACUTE u'\xfd' # 0x0098 -> LATIN SMALL LETTER Y WITH ACUTE u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE u'\xa3' # 0x009c -> POUND SIGN u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE u'\u20a7' # 0x009e -> PESETA SIGN u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE u'\xc1' # 0x00a4 -> LATIN CAPITAL LETTER A WITH ACUTE u'\xcd' # 0x00a5 -> LATIN CAPITAL LETTER I WITH ACUTE u'\xd3' # 0x00a6 -> LATIN CAPITAL LETTER O WITH ACUTE u'\xda' # 0x00a7 -> LATIN CAPITAL LETTER U WITH ACUTE u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK u'\u2310' # 0x00a9 -> REVERSED NOT SIGN u'\xac' # 0x00aa -> NOT SIGN u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2591' # 0x00b0 -> LIGHT SHADE u'\u2592' # 0x00b1 -> MEDIUM SHADE u'\u2593' # 0x00b2 -> DARK SHADE u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT u'\u2588' # 0x00db -> FULL BLOCK u'\u2584' # 0x00dc -> LOWER HALF BLOCK u'\u258c' # 0x00dd -> LEFT HALF BLOCK u'\u2590' # 0x00de -> RIGHT HALF BLOCK u'\u2580' # 0x00df -> UPPER HALF BLOCK u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA u'\xb5' # 0x00e6 -> MICRO SIGN u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA u'\u221e' # 0x00ec -> INFINITY u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON u'\u2229' # 0x00ef -> INTERSECTION u'\u2261' # 0x00f0 -> IDENTICAL TO u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL u'\xf7' # 0x00f6 -> DIVISION SIGN u'\u2248' # 0x00f7 -> ALMOST EQUAL TO u'\xb0' # 0x00f8 -> DEGREE SIGN u'\u2219' # 0x00f9 -> BULLET OPERATOR u'\xb7' # 0x00fa -> MIDDLE DOT u'\u221a' # 0x00fb -> SQUARE ROOT u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N u'\xb2' # 0x00fd -> SUPERSCRIPT TWO u'\u25a0' # 0x00fe -> BLACK SQUARE u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # NULL 0x0001: 0x0001, # START OF HEADING 0x0002: 0x0002, # START OF TEXT 0x0003: 0x0003, # END OF TEXT 0x0004: 0x0004, # END OF TRANSMISSION 0x0005: 0x0005, # ENQUIRY 0x0006: 0x0006, # ACKNOWLEDGE 0x0007: 0x0007, # BELL 0x0008: 0x0008, # BACKSPACE 0x0009: 0x0009, # HORIZONTAL TABULATION 0x000a: 0x000a, # LINE FEED 0x000b: 0x000b, # VERTICAL TABULATION 0x000c: 0x000c, # FORM FEED 0x000d: 0x000d, # CARRIAGE RETURN 0x000e: 0x000e, # SHIFT OUT 0x000f: 0x000f, # SHIFT IN 0x0010: 0x0010, # DATA LINK ESCAPE 0x0011: 0x0011, # DEVICE CONTROL ONE 0x0012: 0x0012, # DEVICE CONTROL TWO 0x0013: 0x0013, # DEVICE CONTROL THREE 0x0014: 0x0014, # DEVICE CONTROL FOUR 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE 0x0016: 0x0016, # SYNCHRONOUS IDLE 0x0017: 0x0017, # END OF TRANSMISSION BLOCK 0x0018: 0x0018, # CANCEL 0x0019: 0x0019, # END OF MEDIUM 0x001a: 0x001a, # SUBSTITUTE 0x001b: 0x001b, # ESCAPE 0x001c: 0x001c, # FILE SEPARATOR 0x001d: 0x001d, # GROUP SEPARATOR 0x001e: 0x001e, # RECORD SEPARATOR 0x001f: 0x001f, # UNIT SEPARATOR 0x0020: 0x0020, # SPACE 0x0021: 0x0021, # EXCLAMATION MARK 0x0022: 0x0022, # QUOTATION MARK 0x0023: 0x0023, # NUMBER SIGN 0x0024: 0x0024, # DOLLAR SIGN 0x0025: 0x0025, # PERCENT SIGN 0x0026: 0x0026, # AMPERSAND 0x0027: 0x0027, # APOSTROPHE 0x0028: 0x0028, # LEFT PARENTHESIS 0x0029: 0x0029, # RIGHT PARENTHESIS 0x002a: 0x002a, # ASTERISK 0x002b: 0x002b, # PLUS SIGN 0x002c: 0x002c, # COMMA 0x002d: 0x002d, # HYPHEN-MINUS 0x002e: 0x002e, # FULL STOP 0x002f: 0x002f, # SOLIDUS 0x0030: 0x0030, # DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE 0x0032: 0x0032, # DIGIT TWO 0x0033: 0x0033, # DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE 0x003a: 0x003a, # COLON 0x003b: 0x003b, # SEMICOLON 0x003c: 0x003c, # LESS-THAN SIGN 0x003d: 0x003d, # EQUALS SIGN 0x003e: 0x003e, # GREATER-THAN SIGN 0x003f: 0x003f, # QUESTION MARK 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005c: 0x005c, # REVERSE SOLIDUS 0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005f: 0x005f, # LOW LINE 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET 0x007c: 0x007c, # VERTICAL LINE 0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # DELETE 0x00a0: 0x00ff, # NO-BREAK SPACE 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK 0x00a3: 0x009c, # POUND SIGN 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00ac: 0x00aa, # NOT SIGN 0x00b0: 0x00f8, # DEGREE SIGN 0x00b1: 0x00f1, # PLUS-MINUS SIGN 0x00b2: 0x00fd, # SUPERSCRIPT TWO 0x00b5: 0x00e6, # MICRO SIGN 0x00b7: 0x00fa, # MIDDLE DOT 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF 0x00bf: 0x00a8, # INVERTED QUESTION MARK 0x00c1: 0x00a4, # LATIN CAPITAL LETTER A WITH ACUTE 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE 0x00cd: 0x00a5, # LATIN CAPITAL LETTER I WITH ACUTE 0x00d0: 0x008b, # LATIN CAPITAL LETTER ETH 0x00d3: 0x00a6, # LATIN CAPITAL LETTER O WITH ACUTE 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE 0x00da: 0x00a7, # LATIN CAPITAL LETTER U WITH ACUTE 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00dd: 0x0097, # LATIN CAPITAL LETTER Y WITH ACUTE 0x00de: 0x008d, # LATIN CAPITAL LETTER THORN 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE 0x00f0: 0x008c, # LATIN SMALL LETTER ETH 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS 0x00f7: 0x00f6, # DIVISION SIGN 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS 0x00fd: 0x0098, # LATIN SMALL LETTER Y WITH ACUTE 0x00fe: 0x0095, # LATIN SMALL LETTER THORN 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON 0x03c0: 0x00e3, # GREEK SMALL LETTER PI 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N 0x20a7: 0x009e, # PESETA SIGN 0x2219: 0x00f9, # BULLET OPERATOR 0x221a: 0x00fb, # SQUARE ROOT 0x221e: 0x00ec, # INFINITY 0x2229: 0x00ef, # INTERSECTION 0x2248: 0x00f7, # ALMOST EQUAL TO 0x2261: 0x00f0, # IDENTICAL TO 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO 0x2310: 0x00a9, # REVERSED NOT SIGN 0x2320: 0x00f4, # TOP HALF INTEGRAL 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x2580: 0x00df, # UPPER HALF BLOCK 0x2584: 0x00dc, # LOWER HALF BLOCK 0x2588: 0x00db, # FULL BLOCK 0x258c: 0x00dd, # LEFT HALF BLOCK 0x2590: 0x00de, # RIGHT HALF BLOCK 0x2591: 0x00b0, # LIGHT SHADE 0x2592: 0x00b1, # MEDIUM SHADE 0x2593: 0x00b2, # DARK SHADE 0x25a0: 0x00fe, # BLACK SQUARE } hz.py 0000644 00000001763 15053241622 0005546 0 ustar 00 # # hz.py: Python Unicode Codec for HZ # # Written by Hye-Shik Chang <perky@FreeBSD.org> # import _codecs_cn, codecs import _multibytecodec as mbc codec = _codecs_cn.getcodec('hz') class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, codecs.IncrementalEncoder): codec = codec class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, codecs.IncrementalDecoder): codec = codec class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): codec = codec class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): codec = codec def getregentry(): return codecs.CodecInfo( name='hz', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) cp865.py 0000644 00000104072 15053241622 0005767 0 ustar 00 """ Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP865.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp865', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE 0x009c: 0x00a3, # POUND SIGN 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE 0x009e: 0x20a7, # PESETA SIGN 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR 0x00a8: 0x00bf, # INVERTED QUESTION MARK 0x00a9: 0x2310, # REVERSED NOT SIGN 0x00aa: 0x00ac, # NOT SIGN 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00af: 0x00a4, # CURRENCY SIGN 0x00b0: 0x2591, # LIGHT SHADE 0x00b1: 0x2592, # MEDIUM SHADE 0x00b2: 0x2593, # DARK SHADE 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x00db: 0x2588, # FULL BLOCK 0x00dc: 0x2584, # LOWER HALF BLOCK 0x00dd: 0x258c, # LEFT HALF BLOCK 0x00de: 0x2590, # RIGHT HALF BLOCK 0x00df: 0x2580, # UPPER HALF BLOCK 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA 0x00e3: 0x03c0, # GREEK SMALL LETTER PI 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA 0x00e6: 0x00b5, # MICRO SIGN 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA 0x00ec: 0x221e, # INFINITY 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON 0x00ef: 0x2229, # INTERSECTION 0x00f0: 0x2261, # IDENTICAL TO 0x00f1: 0x00b1, # PLUS-MINUS SIGN 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO 0x00f4: 0x2320, # TOP HALF INTEGRAL 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL 0x00f6: 0x00f7, # DIVISION SIGN 0x00f7: 0x2248, # ALMOST EQUAL TO 0x00f8: 0x00b0, # DEGREE SIGN 0x00f9: 0x2219, # BULLET OPERATOR 0x00fa: 0x00b7, # MIDDLE DOT 0x00fb: 0x221a, # SQUARE ROOT 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> NULL u'\x01' # 0x0001 -> START OF HEADING u'\x02' # 0x0002 -> START OF TEXT u'\x03' # 0x0003 -> END OF TEXT u'\x04' # 0x0004 -> END OF TRANSMISSION u'\x05' # 0x0005 -> ENQUIRY u'\x06' # 0x0006 -> ACKNOWLEDGE u'\x07' # 0x0007 -> BELL u'\x08' # 0x0008 -> BACKSPACE u'\t' # 0x0009 -> HORIZONTAL TABULATION u'\n' # 0x000a -> LINE FEED u'\x0b' # 0x000b -> VERTICAL TABULATION u'\x0c' # 0x000c -> FORM FEED u'\r' # 0x000d -> CARRIAGE RETURN u'\x0e' # 0x000e -> SHIFT OUT u'\x0f' # 0x000f -> SHIFT IN u'\x10' # 0x0010 -> DATA LINK ESCAPE u'\x11' # 0x0011 -> DEVICE CONTROL ONE u'\x12' # 0x0012 -> DEVICE CONTROL TWO u'\x13' # 0x0013 -> DEVICE CONTROL THREE u'\x14' # 0x0014 -> DEVICE CONTROL FOUR u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x0016 -> SYNCHRONOUS IDLE u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK u'\x18' # 0x0018 -> CANCEL u'\x19' # 0x0019 -> END OF MEDIUM u'\x1a' # 0x001a -> SUBSTITUTE u'\x1b' # 0x001b -> ESCAPE u'\x1c' # 0x001c -> FILE SEPARATOR u'\x1d' # 0x001d -> GROUP SEPARATOR u'\x1e' # 0x001e -> RECORD SEPARATOR u'\x1f' # 0x001f -> UNIT SEPARATOR u' ' # 0x0020 -> SPACE u'!' # 0x0021 -> EXCLAMATION MARK u'"' # 0x0022 -> QUOTATION MARK u'#' # 0x0023 -> NUMBER SIGN u'$' # 0x0024 -> DOLLAR SIGN u'%' # 0x0025 -> PERCENT SIGN u'&' # 0x0026 -> AMPERSAND u"'" # 0x0027 -> APOSTROPHE u'(' # 0x0028 -> LEFT PARENTHESIS u')' # 0x0029 -> RIGHT PARENTHESIS u'*' # 0x002a -> ASTERISK u'+' # 0x002b -> PLUS SIGN u',' # 0x002c -> COMMA u'-' # 0x002d -> HYPHEN-MINUS u'.' # 0x002e -> FULL STOP u'/' # 0x002f -> SOLIDUS u'0' # 0x0030 -> DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE u'2' # 0x0032 -> DIGIT TWO u'3' # 0x0033 -> DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE u':' # 0x003a -> COLON u';' # 0x003b -> SEMICOLON u'<' # 0x003c -> LESS-THAN SIGN u'=' # 0x003d -> EQUALS SIGN u'>' # 0x003e -> GREATER-THAN SIGN u'?' # 0x003f -> QUESTION MARK u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET u'\\' # 0x005c -> REVERSE SOLIDUS u']' # 0x005d -> RIGHT SQUARE BRACKET u'^' # 0x005e -> CIRCUMFLEX ACCENT u'_' # 0x005f -> LOW LINE u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET u'|' # 0x007c -> VERTICAL LINE u'}' # 0x007d -> RIGHT CURLY BRACKET u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> DELETE u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE u'\xa3' # 0x009c -> POUND SIGN u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE u'\u20a7' # 0x009e -> PESETA SIGN u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK u'\u2310' # 0x00a9 -> REVERSED NOT SIGN u'\xac' # 0x00aa -> NOT SIGN u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xa4' # 0x00af -> CURRENCY SIGN u'\u2591' # 0x00b0 -> LIGHT SHADE u'\u2592' # 0x00b1 -> MEDIUM SHADE u'\u2593' # 0x00b2 -> DARK SHADE u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT u'\u2588' # 0x00db -> FULL BLOCK u'\u2584' # 0x00dc -> LOWER HALF BLOCK u'\u258c' # 0x00dd -> LEFT HALF BLOCK u'\u2590' # 0x00de -> RIGHT HALF BLOCK u'\u2580' # 0x00df -> UPPER HALF BLOCK u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA u'\xb5' # 0x00e6 -> MICRO SIGN u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA u'\u221e' # 0x00ec -> INFINITY u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON u'\u2229' # 0x00ef -> INTERSECTION u'\u2261' # 0x00f0 -> IDENTICAL TO u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL u'\xf7' # 0x00f6 -> DIVISION SIGN u'\u2248' # 0x00f7 -> ALMOST EQUAL TO u'\xb0' # 0x00f8 -> DEGREE SIGN u'\u2219' # 0x00f9 -> BULLET OPERATOR u'\xb7' # 0x00fa -> MIDDLE DOT u'\u221a' # 0x00fb -> SQUARE ROOT u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N u'\xb2' # 0x00fd -> SUPERSCRIPT TWO u'\u25a0' # 0x00fe -> BLACK SQUARE u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # NULL 0x0001: 0x0001, # START OF HEADING 0x0002: 0x0002, # START OF TEXT 0x0003: 0x0003, # END OF TEXT 0x0004: 0x0004, # END OF TRANSMISSION 0x0005: 0x0005, # ENQUIRY 0x0006: 0x0006, # ACKNOWLEDGE 0x0007: 0x0007, # BELL 0x0008: 0x0008, # BACKSPACE 0x0009: 0x0009, # HORIZONTAL TABULATION 0x000a: 0x000a, # LINE FEED 0x000b: 0x000b, # VERTICAL TABULATION 0x000c: 0x000c, # FORM FEED 0x000d: 0x000d, # CARRIAGE RETURN 0x000e: 0x000e, # SHIFT OUT 0x000f: 0x000f, # SHIFT IN 0x0010: 0x0010, # DATA LINK ESCAPE 0x0011: 0x0011, # DEVICE CONTROL ONE 0x0012: 0x0012, # DEVICE CONTROL TWO 0x0013: 0x0013, # DEVICE CONTROL THREE 0x0014: 0x0014, # DEVICE CONTROL FOUR 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE 0x0016: 0x0016, # SYNCHRONOUS IDLE 0x0017: 0x0017, # END OF TRANSMISSION BLOCK 0x0018: 0x0018, # CANCEL 0x0019: 0x0019, # END OF MEDIUM 0x001a: 0x001a, # SUBSTITUTE 0x001b: 0x001b, # ESCAPE 0x001c: 0x001c, # FILE SEPARATOR 0x001d: 0x001d, # GROUP SEPARATOR 0x001e: 0x001e, # RECORD SEPARATOR 0x001f: 0x001f, # UNIT SEPARATOR 0x0020: 0x0020, # SPACE 0x0021: 0x0021, # EXCLAMATION MARK 0x0022: 0x0022, # QUOTATION MARK 0x0023: 0x0023, # NUMBER SIGN 0x0024: 0x0024, # DOLLAR SIGN 0x0025: 0x0025, # PERCENT SIGN 0x0026: 0x0026, # AMPERSAND 0x0027: 0x0027, # APOSTROPHE 0x0028: 0x0028, # LEFT PARENTHESIS 0x0029: 0x0029, # RIGHT PARENTHESIS 0x002a: 0x002a, # ASTERISK 0x002b: 0x002b, # PLUS SIGN 0x002c: 0x002c, # COMMA 0x002d: 0x002d, # HYPHEN-MINUS 0x002e: 0x002e, # FULL STOP 0x002f: 0x002f, # SOLIDUS 0x0030: 0x0030, # DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE 0x0032: 0x0032, # DIGIT TWO 0x0033: 0x0033, # DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE 0x003a: 0x003a, # COLON 0x003b: 0x003b, # SEMICOLON 0x003c: 0x003c, # LESS-THAN SIGN 0x003d: 0x003d, # EQUALS SIGN 0x003e: 0x003e, # GREATER-THAN SIGN 0x003f: 0x003f, # QUESTION MARK 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005c: 0x005c, # REVERSE SOLIDUS 0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005f: 0x005f, # LOW LINE 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET 0x007c: 0x007c, # VERTICAL LINE 0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # DELETE 0x00a0: 0x00ff, # NO-BREAK SPACE 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK 0x00a3: 0x009c, # POUND SIGN 0x00a4: 0x00af, # CURRENCY SIGN 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00ac: 0x00aa, # NOT SIGN 0x00b0: 0x00f8, # DEGREE SIGN 0x00b1: 0x00f1, # PLUS-MINUS SIGN 0x00b2: 0x00fd, # SUPERSCRIPT TWO 0x00b5: 0x00e6, # MICRO SIGN 0x00b7: 0x00fa, # MIDDLE DOT 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF 0x00bf: 0x00a8, # INVERTED QUESTION MARK 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS 0x00f7: 0x00f6, # DIVISION SIGN 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON 0x03c0: 0x00e3, # GREEK SMALL LETTER PI 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N 0x20a7: 0x009e, # PESETA SIGN 0x2219: 0x00f9, # BULLET OPERATOR 0x221a: 0x00fb, # SQUARE ROOT 0x221e: 0x00ec, # INFINITY 0x2229: 0x00ef, # INTERSECTION 0x2248: 0x00f7, # ALMOST EQUAL TO 0x2261: 0x00f0, # IDENTICAL TO 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO 0x2310: 0x00a9, # REVERSED NOT SIGN 0x2320: 0x00f4, # TOP HALF INTEGRAL 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x2580: 0x00df, # UPPER HALF BLOCK 0x2584: 0x00dc, # LOWER HALF BLOCK 0x2588: 0x00db, # FULL BLOCK 0x258c: 0x00dd, # LEFT HALF BLOCK 0x2590: 0x00de, # RIGHT HALF BLOCK 0x2591: 0x00b0, # LIGHT SHADE 0x2592: 0x00b1, # MEDIUM SHADE 0x2593: 0x00b2, # DARK SHADE 0x25a0: 0x00fe, # BLACK SQUARE } cp1255.py 0000644 00000030662 15053241622 0006044 0 ustar 00 """ Python Character Mapping Codec cp1255 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp1255', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x04' # 0x04 -> END OF TRANSMISSION u'\x05' # 0x05 -> ENQUIRY u'\x06' # 0x06 -> ACKNOWLEDGE u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION u'\n' # 0x0A -> LINE FEED u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x14' # 0x14 -> DEVICE CONTROL FOUR u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x16 -> SYNCHRONOUS IDLE u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x1a' # 0x1A -> SUBSTITUTE u'\x1b' # 0x1B -> ESCAPE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> DELETE u'\u20ac' # 0x80 -> EURO SIGN u'\ufffe' # 0x81 -> UNDEFINED u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS u'\u2020' # 0x86 -> DAGGER u'\u2021' # 0x87 -> DOUBLE DAGGER u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT u'\u2030' # 0x89 -> PER MILLE SIGN u'\ufffe' # 0x8A -> UNDEFINED u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK u'\ufffe' # 0x8C -> UNDEFINED u'\ufffe' # 0x8D -> UNDEFINED u'\ufffe' # 0x8E -> UNDEFINED u'\ufffe' # 0x8F -> UNDEFINED u'\ufffe' # 0x90 -> UNDEFINED u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK u'\u2022' # 0x95 -> BULLET u'\u2013' # 0x96 -> EN DASH u'\u2014' # 0x97 -> EM DASH u'\u02dc' # 0x98 -> SMALL TILDE u'\u2122' # 0x99 -> TRADE MARK SIGN u'\ufffe' # 0x9A -> UNDEFINED u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK u'\ufffe' # 0x9C -> UNDEFINED u'\ufffe' # 0x9D -> UNDEFINED u'\ufffe' # 0x9E -> UNDEFINED u'\ufffe' # 0x9F -> UNDEFINED u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK u'\xa2' # 0xA2 -> CENT SIGN u'\xa3' # 0xA3 -> POUND SIGN u'\u20aa' # 0xA4 -> NEW SHEQEL SIGN u'\xa5' # 0xA5 -> YEN SIGN u'\xa6' # 0xA6 -> BROKEN BAR u'\xa7' # 0xA7 -> SECTION SIGN u'\xa8' # 0xA8 -> DIAERESIS u'\xa9' # 0xA9 -> COPYRIGHT SIGN u'\xd7' # 0xAA -> MULTIPLICATION SIGN u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xac' # 0xAC -> NOT SIGN u'\xad' # 0xAD -> SOFT HYPHEN u'\xae' # 0xAE -> REGISTERED SIGN u'\xaf' # 0xAF -> MACRON u'\xb0' # 0xB0 -> DEGREE SIGN u'\xb1' # 0xB1 -> PLUS-MINUS SIGN u'\xb2' # 0xB2 -> SUPERSCRIPT TWO u'\xb3' # 0xB3 -> SUPERSCRIPT THREE u'\xb4' # 0xB4 -> ACUTE ACCENT u'\xb5' # 0xB5 -> MICRO SIGN u'\xb6' # 0xB6 -> PILCROW SIGN u'\xb7' # 0xB7 -> MIDDLE DOT u'\xb8' # 0xB8 -> CEDILLA u'\xb9' # 0xB9 -> SUPERSCRIPT ONE u'\xf7' # 0xBA -> DIVISION SIGN u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS u'\xbf' # 0xBF -> INVERTED QUESTION MARK u'\u05b0' # 0xC0 -> HEBREW POINT SHEVA u'\u05b1' # 0xC1 -> HEBREW POINT HATAF SEGOL u'\u05b2' # 0xC2 -> HEBREW POINT HATAF PATAH u'\u05b3' # 0xC3 -> HEBREW POINT HATAF QAMATS u'\u05b4' # 0xC4 -> HEBREW POINT HIRIQ u'\u05b5' # 0xC5 -> HEBREW POINT TSERE u'\u05b6' # 0xC6 -> HEBREW POINT SEGOL u'\u05b7' # 0xC7 -> HEBREW POINT PATAH u'\u05b8' # 0xC8 -> HEBREW POINT QAMATS u'\u05b9' # 0xC9 -> HEBREW POINT HOLAM u'\ufffe' # 0xCA -> UNDEFINED u'\u05bb' # 0xCB -> HEBREW POINT QUBUTS u'\u05bc' # 0xCC -> HEBREW POINT DAGESH OR MAPIQ u'\u05bd' # 0xCD -> HEBREW POINT METEG u'\u05be' # 0xCE -> HEBREW PUNCTUATION MAQAF u'\u05bf' # 0xCF -> HEBREW POINT RAFE u'\u05c0' # 0xD0 -> HEBREW PUNCTUATION PASEQ u'\u05c1' # 0xD1 -> HEBREW POINT SHIN DOT u'\u05c2' # 0xD2 -> HEBREW POINT SIN DOT u'\u05c3' # 0xD3 -> HEBREW PUNCTUATION SOF PASUQ u'\u05f0' # 0xD4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV u'\u05f1' # 0xD5 -> HEBREW LIGATURE YIDDISH VAV YOD u'\u05f2' # 0xD6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD u'\u05f3' # 0xD7 -> HEBREW PUNCTUATION GERESH u'\u05f4' # 0xD8 -> HEBREW PUNCTUATION GERSHAYIM u'\ufffe' # 0xD9 -> UNDEFINED u'\ufffe' # 0xDA -> UNDEFINED u'\ufffe' # 0xDB -> UNDEFINED u'\ufffe' # 0xDC -> UNDEFINED u'\ufffe' # 0xDD -> UNDEFINED u'\ufffe' # 0xDE -> UNDEFINED u'\ufffe' # 0xDF -> UNDEFINED u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF u'\u05d1' # 0xE1 -> HEBREW LETTER BET u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL u'\u05d3' # 0xE3 -> HEBREW LETTER DALET u'\u05d4' # 0xE4 -> HEBREW LETTER HE u'\u05d5' # 0xE5 -> HEBREW LETTER VAV u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN u'\u05d7' # 0xE7 -> HEBREW LETTER HET u'\u05d8' # 0xE8 -> HEBREW LETTER TET u'\u05d9' # 0xE9 -> HEBREW LETTER YOD u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF u'\u05db' # 0xEB -> HEBREW LETTER KAF u'\u05dc' # 0xEC -> HEBREW LETTER LAMED u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM u'\u05de' # 0xEE -> HEBREW LETTER MEM u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN u'\u05e0' # 0xF0 -> HEBREW LETTER NUN u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE u'\u05e4' # 0xF4 -> HEBREW LETTER PE u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI u'\u05e7' # 0xF7 -> HEBREW LETTER QOF u'\u05e8' # 0xF8 -> HEBREW LETTER RESH u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN u'\u05ea' # 0xFA -> HEBREW LETTER TAV u'\ufffe' # 0xFB -> UNDEFINED u'\ufffe' # 0xFC -> UNDEFINED u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK u'\ufffe' # 0xFF -> UNDEFINED ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) iso8859_16.py 0000644 00000032765 15053241622 0006571 0 ustar 00 """ Python Character Mapping Codec iso8859_16 generated from 'MAPPINGS/ISO8859/8859-16.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='iso8859-16', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x04' # 0x04 -> END OF TRANSMISSION u'\x05' # 0x05 -> ENQUIRY u'\x06' # 0x06 -> ACKNOWLEDGE u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION u'\n' # 0x0A -> LINE FEED u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x14' # 0x14 -> DEVICE CONTROL FOUR u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x16 -> SYNCHRONOUS IDLE u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x1a' # 0x1A -> SUBSTITUTE u'\x1b' # 0x1B -> ESCAPE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> <control> u'\x81' # 0x81 -> <control> u'\x82' # 0x82 -> <control> u'\x83' # 0x83 -> <control> u'\x84' # 0x84 -> <control> u'\x85' # 0x85 -> <control> u'\x86' # 0x86 -> <control> u'\x87' # 0x87 -> <control> u'\x88' # 0x88 -> <control> u'\x89' # 0x89 -> <control> u'\x8a' # 0x8A -> <control> u'\x8b' # 0x8B -> <control> u'\x8c' # 0x8C -> <control> u'\x8d' # 0x8D -> <control> u'\x8e' # 0x8E -> <control> u'\x8f' # 0x8F -> <control> u'\x90' # 0x90 -> <control> u'\x91' # 0x91 -> <control> u'\x92' # 0x92 -> <control> u'\x93' # 0x93 -> <control> u'\x94' # 0x94 -> <control> u'\x95' # 0x95 -> <control> u'\x96' # 0x96 -> <control> u'\x97' # 0x97 -> <control> u'\x98' # 0x98 -> <control> u'\x99' # 0x99 -> <control> u'\x9a' # 0x9A -> <control> u'\x9b' # 0x9B -> <control> u'\x9c' # 0x9C -> <control> u'\x9d' # 0x9D -> <control> u'\x9e' # 0x9E -> <control> u'\x9f' # 0x9F -> <control> u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK u'\u0105' # 0xA2 -> LATIN SMALL LETTER A WITH OGONEK u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE u'\u20ac' # 0xA4 -> EURO SIGN u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON u'\xa7' # 0xA7 -> SECTION SIGN u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON u'\xa9' # 0xA9 -> COPYRIGHT SIGN u'\u0218' # 0xAA -> LATIN CAPITAL LETTER S WITH COMMA BELOW u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE u'\xad' # 0xAD -> SOFT HYPHEN u'\u017a' # 0xAE -> LATIN SMALL LETTER Z WITH ACUTE u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE u'\xb0' # 0xB0 -> DEGREE SIGN u'\xb1' # 0xB1 -> PLUS-MINUS SIGN u'\u010c' # 0xB2 -> LATIN CAPITAL LETTER C WITH CARON u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON u'\u201d' # 0xB5 -> RIGHT DOUBLE QUOTATION MARK u'\xb6' # 0xB6 -> PILCROW SIGN u'\xb7' # 0xB7 -> MIDDLE DOT u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON u'\u010d' # 0xB9 -> LATIN SMALL LETTER C WITH CARON u'\u0219' # 0xBA -> LATIN SMALL LETTER S WITH COMMA BELOW u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\u0106' # 0xC5 -> LATIN CAPITAL LETTER C WITH ACUTE u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\u015a' # 0xD7 -> LATIN CAPITAL LETTER S WITH ACUTE u'\u0170' # 0xD8 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\u0118' # 0xDD -> LATIN CAPITAL LETTER E WITH OGONEK u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS u'\u0107' # 0xE5 -> LATIN SMALL LETTER C WITH ACUTE u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS u'\u015b' # 0xF7 -> LATIN SMALL LETTER S WITH ACUTE u'\u0171' # 0xF8 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS u'\u0119' # 0xFD -> LATIN SMALL LETTER E WITH OGONEK u'\u021b' # 0xFE -> LATIN SMALL LETTER T WITH COMMA BELOW u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) iso2022_jp_1.py 0000644 00000002045 15053241622 0007130 0 ustar 00 # # iso2022_jp_1.py: Python Unicode Codec for ISO2022_JP_1 # # Written by Hye-Shik Chang <perky@FreeBSD.org> # import _codecs_iso2022, codecs import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp_1') class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, codecs.IncrementalEncoder): codec = codec class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, codecs.IncrementalDecoder): codec = codec class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): codec = codec class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): codec = codec def getregentry(): return codecs.CodecInfo( name='iso2022_jp_1', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) iso8859_6.py 0000644 00000025521 15053241622 0006500 0 ustar 00 """ Python Character Mapping Codec iso8859_6 generated from 'MAPPINGS/ISO8859/8859-6.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='iso8859-6', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x04' # 0x04 -> END OF TRANSMISSION u'\x05' # 0x05 -> ENQUIRY u'\x06' # 0x06 -> ACKNOWLEDGE u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION u'\n' # 0x0A -> LINE FEED u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x14' # 0x14 -> DEVICE CONTROL FOUR u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x16 -> SYNCHRONOUS IDLE u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x1a' # 0x1A -> SUBSTITUTE u'\x1b' # 0x1B -> ESCAPE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> <control> u'\x81' # 0x81 -> <control> u'\x82' # 0x82 -> <control> u'\x83' # 0x83 -> <control> u'\x84' # 0x84 -> <control> u'\x85' # 0x85 -> <control> u'\x86' # 0x86 -> <control> u'\x87' # 0x87 -> <control> u'\x88' # 0x88 -> <control> u'\x89' # 0x89 -> <control> u'\x8a' # 0x8A -> <control> u'\x8b' # 0x8B -> <control> u'\x8c' # 0x8C -> <control> u'\x8d' # 0x8D -> <control> u'\x8e' # 0x8E -> <control> u'\x8f' # 0x8F -> <control> u'\x90' # 0x90 -> <control> u'\x91' # 0x91 -> <control> u'\x92' # 0x92 -> <control> u'\x93' # 0x93 -> <control> u'\x94' # 0x94 -> <control> u'\x95' # 0x95 -> <control> u'\x96' # 0x96 -> <control> u'\x97' # 0x97 -> <control> u'\x98' # 0x98 -> <control> u'\x99' # 0x99 -> <control> u'\x9a' # 0x9A -> <control> u'\x9b' # 0x9B -> <control> u'\x9c' # 0x9C -> <control> u'\x9d' # 0x9D -> <control> u'\x9e' # 0x9E -> <control> u'\x9f' # 0x9F -> <control> u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\ufffe' u'\ufffe' u'\ufffe' u'\xa4' # 0xA4 -> CURRENCY SIGN u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\u060c' # 0xAC -> ARABIC COMMA u'\xad' # 0xAD -> SOFT HYPHEN u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\u061b' # 0xBB -> ARABIC SEMICOLON u'\ufffe' u'\ufffe' u'\ufffe' u'\u061f' # 0xBF -> ARABIC QUESTION MARK u'\ufffe' u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE u'\u0627' # 0xC7 -> ARABIC LETTER ALEF u'\u0628' # 0xC8 -> ARABIC LETTER BEH u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA u'\u062a' # 0xCA -> ARABIC LETTER TEH u'\u062b' # 0xCB -> ARABIC LETTER THEH u'\u062c' # 0xCC -> ARABIC LETTER JEEM u'\u062d' # 0xCD -> ARABIC LETTER HAH u'\u062e' # 0xCE -> ARABIC LETTER KHAH u'\u062f' # 0xCF -> ARABIC LETTER DAL u'\u0630' # 0xD0 -> ARABIC LETTER THAL u'\u0631' # 0xD1 -> ARABIC LETTER REH u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN u'\u0633' # 0xD3 -> ARABIC LETTER SEEN u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN u'\u0635' # 0xD5 -> ARABIC LETTER SAD u'\u0636' # 0xD6 -> ARABIC LETTER DAD u'\u0637' # 0xD7 -> ARABIC LETTER TAH u'\u0638' # 0xD8 -> ARABIC LETTER ZAH u'\u0639' # 0xD9 -> ARABIC LETTER AIN u'\u063a' # 0xDA -> ARABIC LETTER GHAIN u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\u0640' # 0xE0 -> ARABIC TATWEEL u'\u0641' # 0xE1 -> ARABIC LETTER FEH u'\u0642' # 0xE2 -> ARABIC LETTER QAF u'\u0643' # 0xE3 -> ARABIC LETTER KAF u'\u0644' # 0xE4 -> ARABIC LETTER LAM u'\u0645' # 0xE5 -> ARABIC LETTER MEEM u'\u0646' # 0xE6 -> ARABIC LETTER NOON u'\u0647' # 0xE7 -> ARABIC LETTER HEH u'\u0648' # 0xE8 -> ARABIC LETTER WAW u'\u0649' # 0xE9 -> ARABIC LETTER ALEF MAKSURA u'\u064a' # 0xEA -> ARABIC LETTER YEH u'\u064b' # 0xEB -> ARABIC FATHATAN u'\u064c' # 0xEC -> ARABIC DAMMATAN u'\u064d' # 0xED -> ARABIC KASRATAN u'\u064e' # 0xEE -> ARABIC FATHA u'\u064f' # 0xEF -> ARABIC DAMMA u'\u0650' # 0xF0 -> ARABIC KASRA u'\u0651' # 0xF1 -> ARABIC SHADDA u'\u0652' # 0xF2 -> ARABIC SUKUN u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' u'\ufffe' ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) rot_13.py 0000755 00000005064 15053241622 0006235 0 ustar 00 #! /usr/bin/python2.7 """ Python Character Mapping Codec for ROT13. See http://ucsub.colorado.edu/~kominek/rot13/ for details. Written by Marc-Andre Lemburg (mal@lemburg.com). """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_map)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='rot-13', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, _is_text_encoding=False, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0041: 0x004e, 0x0042: 0x004f, 0x0043: 0x0050, 0x0044: 0x0051, 0x0045: 0x0052, 0x0046: 0x0053, 0x0047: 0x0054, 0x0048: 0x0055, 0x0049: 0x0056, 0x004a: 0x0057, 0x004b: 0x0058, 0x004c: 0x0059, 0x004d: 0x005a, 0x004e: 0x0041, 0x004f: 0x0042, 0x0050: 0x0043, 0x0051: 0x0044, 0x0052: 0x0045, 0x0053: 0x0046, 0x0054: 0x0047, 0x0055: 0x0048, 0x0056: 0x0049, 0x0057: 0x004a, 0x0058: 0x004b, 0x0059: 0x004c, 0x005a: 0x004d, 0x0061: 0x006e, 0x0062: 0x006f, 0x0063: 0x0070, 0x0064: 0x0071, 0x0065: 0x0072, 0x0066: 0x0073, 0x0067: 0x0074, 0x0068: 0x0075, 0x0069: 0x0076, 0x006a: 0x0077, 0x006b: 0x0078, 0x006c: 0x0079, 0x006d: 0x007a, 0x006e: 0x0061, 0x006f: 0x0062, 0x0070: 0x0063, 0x0071: 0x0064, 0x0072: 0x0065, 0x0073: 0x0066, 0x0074: 0x0067, 0x0075: 0x0068, 0x0076: 0x0069, 0x0077: 0x006a, 0x0078: 0x006b, 0x0079: 0x006c, 0x007a: 0x006d, }) ### Encoding Map encoding_map = codecs.make_encoding_map(decoding_map) ### Filter API def rot13(infile, outfile): outfile.write(infile.read().encode('rot-13')) if __name__ == '__main__': import sys rot13(sys.stdin, sys.stdout) johab.py 0000644 00000001777 15053241622 0006215 0 ustar 00 # # johab.py: Python Unicode Codec for JOHAB # # Written by Hye-Shik Chang <perky@FreeBSD.org> # import _codecs_kr, codecs import _multibytecodec as mbc codec = _codecs_kr.getcodec('johab') class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, codecs.IncrementalEncoder): codec = codec class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, codecs.IncrementalDecoder): codec = codec class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): codec = codec class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): codec = codec def getregentry(): return codecs.CodecInfo( name='johab', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) cp862.py 0000644 00000101532 15053241622 0005762 0 ustar 00 """ Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP862.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp862', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x05d0, # HEBREW LETTER ALEF 0x0081: 0x05d1, # HEBREW LETTER BET 0x0082: 0x05d2, # HEBREW LETTER GIMEL 0x0083: 0x05d3, # HEBREW LETTER DALET 0x0084: 0x05d4, # HEBREW LETTER HE 0x0085: 0x05d5, # HEBREW LETTER VAV 0x0086: 0x05d6, # HEBREW LETTER ZAYIN 0x0087: 0x05d7, # HEBREW LETTER HET 0x0088: 0x05d8, # HEBREW LETTER TET 0x0089: 0x05d9, # HEBREW LETTER YOD 0x008a: 0x05da, # HEBREW LETTER FINAL KAF 0x008b: 0x05db, # HEBREW LETTER KAF 0x008c: 0x05dc, # HEBREW LETTER LAMED 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM 0x008e: 0x05de, # HEBREW LETTER MEM 0x008f: 0x05df, # HEBREW LETTER FINAL NUN 0x0090: 0x05e0, # HEBREW LETTER NUN 0x0091: 0x05e1, # HEBREW LETTER SAMEKH 0x0092: 0x05e2, # HEBREW LETTER AYIN 0x0093: 0x05e3, # HEBREW LETTER FINAL PE 0x0094: 0x05e4, # HEBREW LETTER PE 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI 0x0096: 0x05e6, # HEBREW LETTER TSADI 0x0097: 0x05e7, # HEBREW LETTER QOF 0x0098: 0x05e8, # HEBREW LETTER RESH 0x0099: 0x05e9, # HEBREW LETTER SHIN 0x009a: 0x05ea, # HEBREW LETTER TAV 0x009b: 0x00a2, # CENT SIGN 0x009c: 0x00a3, # POUND SIGN 0x009d: 0x00a5, # YEN SIGN 0x009e: 0x20a7, # PESETA SIGN 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR 0x00a8: 0x00bf, # INVERTED QUESTION MARK 0x00a9: 0x2310, # REVERSED NOT SIGN 0x00aa: 0x00ac, # NOT SIGN 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00b0: 0x2591, # LIGHT SHADE 0x00b1: 0x2592, # MEDIUM SHADE 0x00b2: 0x2593, # DARK SHADE 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x00db: 0x2588, # FULL BLOCK 0x00dc: 0x2584, # LOWER HALF BLOCK 0x00dd: 0x258c, # LEFT HALF BLOCK 0x00de: 0x2590, # RIGHT HALF BLOCK 0x00df: 0x2580, # UPPER HALF BLOCK 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA 0x00e3: 0x03c0, # GREEK SMALL LETTER PI 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA 0x00e6: 0x00b5, # MICRO SIGN 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA 0x00ec: 0x221e, # INFINITY 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON 0x00ef: 0x2229, # INTERSECTION 0x00f0: 0x2261, # IDENTICAL TO 0x00f1: 0x00b1, # PLUS-MINUS SIGN 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO 0x00f4: 0x2320, # TOP HALF INTEGRAL 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL 0x00f6: 0x00f7, # DIVISION SIGN 0x00f7: 0x2248, # ALMOST EQUAL TO 0x00f8: 0x00b0, # DEGREE SIGN 0x00f9: 0x2219, # BULLET OPERATOR 0x00fa: 0x00b7, # MIDDLE DOT 0x00fb: 0x221a, # SQUARE ROOT 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> NULL u'\x01' # 0x0001 -> START OF HEADING u'\x02' # 0x0002 -> START OF TEXT u'\x03' # 0x0003 -> END OF TEXT u'\x04' # 0x0004 -> END OF TRANSMISSION u'\x05' # 0x0005 -> ENQUIRY u'\x06' # 0x0006 -> ACKNOWLEDGE u'\x07' # 0x0007 -> BELL u'\x08' # 0x0008 -> BACKSPACE u'\t' # 0x0009 -> HORIZONTAL TABULATION u'\n' # 0x000a -> LINE FEED u'\x0b' # 0x000b -> VERTICAL TABULATION u'\x0c' # 0x000c -> FORM FEED u'\r' # 0x000d -> CARRIAGE RETURN u'\x0e' # 0x000e -> SHIFT OUT u'\x0f' # 0x000f -> SHIFT IN u'\x10' # 0x0010 -> DATA LINK ESCAPE u'\x11' # 0x0011 -> DEVICE CONTROL ONE u'\x12' # 0x0012 -> DEVICE CONTROL TWO u'\x13' # 0x0013 -> DEVICE CONTROL THREE u'\x14' # 0x0014 -> DEVICE CONTROL FOUR u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x0016 -> SYNCHRONOUS IDLE u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK u'\x18' # 0x0018 -> CANCEL u'\x19' # 0x0019 -> END OF MEDIUM u'\x1a' # 0x001a -> SUBSTITUTE u'\x1b' # 0x001b -> ESCAPE u'\x1c' # 0x001c -> FILE SEPARATOR u'\x1d' # 0x001d -> GROUP SEPARATOR u'\x1e' # 0x001e -> RECORD SEPARATOR u'\x1f' # 0x001f -> UNIT SEPARATOR u' ' # 0x0020 -> SPACE u'!' # 0x0021 -> EXCLAMATION MARK u'"' # 0x0022 -> QUOTATION MARK u'#' # 0x0023 -> NUMBER SIGN u'$' # 0x0024 -> DOLLAR SIGN u'%' # 0x0025 -> PERCENT SIGN u'&' # 0x0026 -> AMPERSAND u"'" # 0x0027 -> APOSTROPHE u'(' # 0x0028 -> LEFT PARENTHESIS u')' # 0x0029 -> RIGHT PARENTHESIS u'*' # 0x002a -> ASTERISK u'+' # 0x002b -> PLUS SIGN u',' # 0x002c -> COMMA u'-' # 0x002d -> HYPHEN-MINUS u'.' # 0x002e -> FULL STOP u'/' # 0x002f -> SOLIDUS u'0' # 0x0030 -> DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE u'2' # 0x0032 -> DIGIT TWO u'3' # 0x0033 -> DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE u':' # 0x003a -> COLON u';' # 0x003b -> SEMICOLON u'<' # 0x003c -> LESS-THAN SIGN u'=' # 0x003d -> EQUALS SIGN u'>' # 0x003e -> GREATER-THAN SIGN u'?' # 0x003f -> QUESTION MARK u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET u'\\' # 0x005c -> REVERSE SOLIDUS u']' # 0x005d -> RIGHT SQUARE BRACKET u'^' # 0x005e -> CIRCUMFLEX ACCENT u'_' # 0x005f -> LOW LINE u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET u'|' # 0x007c -> VERTICAL LINE u'}' # 0x007d -> RIGHT CURLY BRACKET u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> DELETE u'\u05d0' # 0x0080 -> HEBREW LETTER ALEF u'\u05d1' # 0x0081 -> HEBREW LETTER BET u'\u05d2' # 0x0082 -> HEBREW LETTER GIMEL u'\u05d3' # 0x0083 -> HEBREW LETTER DALET u'\u05d4' # 0x0084 -> HEBREW LETTER HE u'\u05d5' # 0x0085 -> HEBREW LETTER VAV u'\u05d6' # 0x0086 -> HEBREW LETTER ZAYIN u'\u05d7' # 0x0087 -> HEBREW LETTER HET u'\u05d8' # 0x0088 -> HEBREW LETTER TET u'\u05d9' # 0x0089 -> HEBREW LETTER YOD u'\u05da' # 0x008a -> HEBREW LETTER FINAL KAF u'\u05db' # 0x008b -> HEBREW LETTER KAF u'\u05dc' # 0x008c -> HEBREW LETTER LAMED u'\u05dd' # 0x008d -> HEBREW LETTER FINAL MEM u'\u05de' # 0x008e -> HEBREW LETTER MEM u'\u05df' # 0x008f -> HEBREW LETTER FINAL NUN u'\u05e0' # 0x0090 -> HEBREW LETTER NUN u'\u05e1' # 0x0091 -> HEBREW LETTER SAMEKH u'\u05e2' # 0x0092 -> HEBREW LETTER AYIN u'\u05e3' # 0x0093 -> HEBREW LETTER FINAL PE u'\u05e4' # 0x0094 -> HEBREW LETTER PE u'\u05e5' # 0x0095 -> HEBREW LETTER FINAL TSADI u'\u05e6' # 0x0096 -> HEBREW LETTER TSADI u'\u05e7' # 0x0097 -> HEBREW LETTER QOF u'\u05e8' # 0x0098 -> HEBREW LETTER RESH u'\u05e9' # 0x0099 -> HEBREW LETTER SHIN u'\u05ea' # 0x009a -> HEBREW LETTER TAV u'\xa2' # 0x009b -> CENT SIGN u'\xa3' # 0x009c -> POUND SIGN u'\xa5' # 0x009d -> YEN SIGN u'\u20a7' # 0x009e -> PESETA SIGN u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK u'\u2310' # 0x00a9 -> REVERSED NOT SIGN u'\xac' # 0x00aa -> NOT SIGN u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2591' # 0x00b0 -> LIGHT SHADE u'\u2592' # 0x00b1 -> MEDIUM SHADE u'\u2593' # 0x00b2 -> DARK SHADE u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT u'\u2588' # 0x00db -> FULL BLOCK u'\u2584' # 0x00dc -> LOWER HALF BLOCK u'\u258c' # 0x00dd -> LEFT HALF BLOCK u'\u2590' # 0x00de -> RIGHT HALF BLOCK u'\u2580' # 0x00df -> UPPER HALF BLOCK u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA u'\xb5' # 0x00e6 -> MICRO SIGN u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA u'\u221e' # 0x00ec -> INFINITY u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON u'\u2229' # 0x00ef -> INTERSECTION u'\u2261' # 0x00f0 -> IDENTICAL TO u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL u'\xf7' # 0x00f6 -> DIVISION SIGN u'\u2248' # 0x00f7 -> ALMOST EQUAL TO u'\xb0' # 0x00f8 -> DEGREE SIGN u'\u2219' # 0x00f9 -> BULLET OPERATOR u'\xb7' # 0x00fa -> MIDDLE DOT u'\u221a' # 0x00fb -> SQUARE ROOT u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N u'\xb2' # 0x00fd -> SUPERSCRIPT TWO u'\u25a0' # 0x00fe -> BLACK SQUARE u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # NULL 0x0001: 0x0001, # START OF HEADING 0x0002: 0x0002, # START OF TEXT 0x0003: 0x0003, # END OF TEXT 0x0004: 0x0004, # END OF TRANSMISSION 0x0005: 0x0005, # ENQUIRY 0x0006: 0x0006, # ACKNOWLEDGE 0x0007: 0x0007, # BELL 0x0008: 0x0008, # BACKSPACE 0x0009: 0x0009, # HORIZONTAL TABULATION 0x000a: 0x000a, # LINE FEED 0x000b: 0x000b, # VERTICAL TABULATION 0x000c: 0x000c, # FORM FEED 0x000d: 0x000d, # CARRIAGE RETURN 0x000e: 0x000e, # SHIFT OUT 0x000f: 0x000f, # SHIFT IN 0x0010: 0x0010, # DATA LINK ESCAPE 0x0011: 0x0011, # DEVICE CONTROL ONE 0x0012: 0x0012, # DEVICE CONTROL TWO 0x0013: 0x0013, # DEVICE CONTROL THREE 0x0014: 0x0014, # DEVICE CONTROL FOUR 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE 0x0016: 0x0016, # SYNCHRONOUS IDLE 0x0017: 0x0017, # END OF TRANSMISSION BLOCK 0x0018: 0x0018, # CANCEL 0x0019: 0x0019, # END OF MEDIUM 0x001a: 0x001a, # SUBSTITUTE 0x001b: 0x001b, # ESCAPE 0x001c: 0x001c, # FILE SEPARATOR 0x001d: 0x001d, # GROUP SEPARATOR 0x001e: 0x001e, # RECORD SEPARATOR 0x001f: 0x001f, # UNIT SEPARATOR 0x0020: 0x0020, # SPACE 0x0021: 0x0021, # EXCLAMATION MARK 0x0022: 0x0022, # QUOTATION MARK 0x0023: 0x0023, # NUMBER SIGN 0x0024: 0x0024, # DOLLAR SIGN 0x0025: 0x0025, # PERCENT SIGN 0x0026: 0x0026, # AMPERSAND 0x0027: 0x0027, # APOSTROPHE 0x0028: 0x0028, # LEFT PARENTHESIS 0x0029: 0x0029, # RIGHT PARENTHESIS 0x002a: 0x002a, # ASTERISK 0x002b: 0x002b, # PLUS SIGN 0x002c: 0x002c, # COMMA 0x002d: 0x002d, # HYPHEN-MINUS 0x002e: 0x002e, # FULL STOP 0x002f: 0x002f, # SOLIDUS 0x0030: 0x0030, # DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE 0x0032: 0x0032, # DIGIT TWO 0x0033: 0x0033, # DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE 0x003a: 0x003a, # COLON 0x003b: 0x003b, # SEMICOLON 0x003c: 0x003c, # LESS-THAN SIGN 0x003d: 0x003d, # EQUALS SIGN 0x003e: 0x003e, # GREATER-THAN SIGN 0x003f: 0x003f, # QUESTION MARK 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005c: 0x005c, # REVERSE SOLIDUS 0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005f: 0x005f, # LOW LINE 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET 0x007c: 0x007c, # VERTICAL LINE 0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # DELETE 0x00a0: 0x00ff, # NO-BREAK SPACE 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK 0x00a2: 0x009b, # CENT SIGN 0x00a3: 0x009c, # POUND SIGN 0x00a5: 0x009d, # YEN SIGN 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00ac: 0x00aa, # NOT SIGN 0x00b0: 0x00f8, # DEGREE SIGN 0x00b1: 0x00f1, # PLUS-MINUS SIGN 0x00b2: 0x00fd, # SUPERSCRIPT TWO 0x00b5: 0x00e6, # MICRO SIGN 0x00b7: 0x00fa, # MIDDLE DOT 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF 0x00bf: 0x00a8, # INVERTED QUESTION MARK 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE 0x00f7: 0x00f6, # DIVISION SIGN 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON 0x03c0: 0x00e3, # GREEK SMALL LETTER PI 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI 0x05d0: 0x0080, # HEBREW LETTER ALEF 0x05d1: 0x0081, # HEBREW LETTER BET 0x05d2: 0x0082, # HEBREW LETTER GIMEL 0x05d3: 0x0083, # HEBREW LETTER DALET 0x05d4: 0x0084, # HEBREW LETTER HE 0x05d5: 0x0085, # HEBREW LETTER VAV 0x05d6: 0x0086, # HEBREW LETTER ZAYIN 0x05d7: 0x0087, # HEBREW LETTER HET 0x05d8: 0x0088, # HEBREW LETTER TET 0x05d9: 0x0089, # HEBREW LETTER YOD 0x05da: 0x008a, # HEBREW LETTER FINAL KAF 0x05db: 0x008b, # HEBREW LETTER KAF 0x05dc: 0x008c, # HEBREW LETTER LAMED 0x05dd: 0x008d, # HEBREW LETTER FINAL MEM 0x05de: 0x008e, # HEBREW LETTER MEM 0x05df: 0x008f, # HEBREW LETTER FINAL NUN 0x05e0: 0x0090, # HEBREW LETTER NUN 0x05e1: 0x0091, # HEBREW LETTER SAMEKH 0x05e2: 0x0092, # HEBREW LETTER AYIN 0x05e3: 0x0093, # HEBREW LETTER FINAL PE 0x05e4: 0x0094, # HEBREW LETTER PE 0x05e5: 0x0095, # HEBREW LETTER FINAL TSADI 0x05e6: 0x0096, # HEBREW LETTER TSADI 0x05e7: 0x0097, # HEBREW LETTER QOF 0x05e8: 0x0098, # HEBREW LETTER RESH 0x05e9: 0x0099, # HEBREW LETTER SHIN 0x05ea: 0x009a, # HEBREW LETTER TAV 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N 0x20a7: 0x009e, # PESETA SIGN 0x2219: 0x00f9, # BULLET OPERATOR 0x221a: 0x00fb, # SQUARE ROOT 0x221e: 0x00ec, # INFINITY 0x2229: 0x00ef, # INTERSECTION 0x2248: 0x00f7, # ALMOST EQUAL TO 0x2261: 0x00f0, # IDENTICAL TO 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO 0x2310: 0x00a9, # REVERSED NOT SIGN 0x2320: 0x00f4, # TOP HALF INTEGRAL 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x2580: 0x00df, # UPPER HALF BLOCK 0x2584: 0x00dc, # LOWER HALF BLOCK 0x2588: 0x00db, # FULL BLOCK 0x258c: 0x00dd, # LEFT HALF BLOCK 0x2590: 0x00de, # RIGHT HALF BLOCK 0x2591: 0x00b0, # LIGHT SHADE 0x2592: 0x00b1, # MEDIUM SHADE 0x2593: 0x00b2, # DARK SHADE 0x25a0: 0x00fe, # BLACK SQUARE } aliases.py 0000644 00000035000 15053241622 0006535 0 ustar 00 """ Encoding Aliases Support This module is used by the encodings package search function to map encodings names to module names. Note that the search function normalizes the encoding names before doing the lookup, so the mapping will have to map normalized encoding names to module names. Contents: The following aliases dictionary contains mappings of all IANA character set names for which the Python core library provides codecs. In addition to these, a few Python specific codec aliases have also been added. """ aliases = { # Please keep this list sorted alphabetically by value ! # ascii codec '646' : 'ascii', 'ansi_x3.4_1968' : 'ascii', 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name 'ansi_x3.4_1986' : 'ascii', 'cp367' : 'ascii', 'csascii' : 'ascii', 'ibm367' : 'ascii', 'iso646_us' : 'ascii', 'iso_646.irv_1991' : 'ascii', 'iso_ir_6' : 'ascii', 'us' : 'ascii', 'us_ascii' : 'ascii', # base64_codec codec 'base64' : 'base64_codec', 'base_64' : 'base64_codec', # big5 codec 'big5_tw' : 'big5', 'csbig5' : 'big5', # big5hkscs codec 'big5_hkscs' : 'big5hkscs', 'hkscs' : 'big5hkscs', # bz2_codec codec 'bz2' : 'bz2_codec', # cp037 codec '037' : 'cp037', 'csibm037' : 'cp037', 'ebcdic_cp_ca' : 'cp037', 'ebcdic_cp_nl' : 'cp037', 'ebcdic_cp_us' : 'cp037', 'ebcdic_cp_wt' : 'cp037', 'ibm037' : 'cp037', 'ibm039' : 'cp037', # cp1026 codec '1026' : 'cp1026', 'csibm1026' : 'cp1026', 'ibm1026' : 'cp1026', # cp1140 codec '1140' : 'cp1140', 'ibm1140' : 'cp1140', # cp1250 codec '1250' : 'cp1250', 'windows_1250' : 'cp1250', # cp1251 codec '1251' : 'cp1251', 'windows_1251' : 'cp1251', # cp1252 codec '1252' : 'cp1252', 'windows_1252' : 'cp1252', # cp1253 codec '1253' : 'cp1253', 'windows_1253' : 'cp1253', # cp1254 codec '1254' : 'cp1254', 'windows_1254' : 'cp1254', # cp1255 codec '1255' : 'cp1255', 'windows_1255' : 'cp1255', # cp1256 codec '1256' : 'cp1256', 'windows_1256' : 'cp1256', # cp1257 codec '1257' : 'cp1257', 'windows_1257' : 'cp1257', # cp1258 codec '1258' : 'cp1258', 'windows_1258' : 'cp1258', # cp424 codec '424' : 'cp424', 'csibm424' : 'cp424', 'ebcdic_cp_he' : 'cp424', 'ibm424' : 'cp424', # cp437 codec '437' : 'cp437', 'cspc8codepage437' : 'cp437', 'ibm437' : 'cp437', # cp500 codec '500' : 'cp500', 'csibm500' : 'cp500', 'ebcdic_cp_be' : 'cp500', 'ebcdic_cp_ch' : 'cp500', 'ibm500' : 'cp500', # cp775 codec '775' : 'cp775', 'cspc775baltic' : 'cp775', 'ibm775' : 'cp775', # cp850 codec '850' : 'cp850', 'cspc850multilingual' : 'cp850', 'ibm850' : 'cp850', # cp852 codec '852' : 'cp852', 'cspcp852' : 'cp852', 'ibm852' : 'cp852', # cp855 codec '855' : 'cp855', 'csibm855' : 'cp855', 'ibm855' : 'cp855', # cp857 codec '857' : 'cp857', 'csibm857' : 'cp857', 'ibm857' : 'cp857', # cp858 codec '858' : 'cp858', 'csibm858' : 'cp858', 'ibm858' : 'cp858', # cp860 codec '860' : 'cp860', 'csibm860' : 'cp860', 'ibm860' : 'cp860', # cp861 codec '861' : 'cp861', 'cp_is' : 'cp861', 'csibm861' : 'cp861', 'ibm861' : 'cp861', # cp862 codec '862' : 'cp862', 'cspc862latinhebrew' : 'cp862', 'ibm862' : 'cp862', # cp863 codec '863' : 'cp863', 'csibm863' : 'cp863', 'ibm863' : 'cp863', # cp864 codec '864' : 'cp864', 'csibm864' : 'cp864', 'ibm864' : 'cp864', # cp865 codec '865' : 'cp865', 'csibm865' : 'cp865', 'ibm865' : 'cp865', # cp866 codec '866' : 'cp866', 'csibm866' : 'cp866', 'ibm866' : 'cp866', # cp869 codec '869' : 'cp869', 'cp_gr' : 'cp869', 'csibm869' : 'cp869', 'ibm869' : 'cp869', # cp932 codec '932' : 'cp932', 'ms932' : 'cp932', 'mskanji' : 'cp932', 'ms_kanji' : 'cp932', # cp949 codec '949' : 'cp949', 'ms949' : 'cp949', 'uhc' : 'cp949', # cp950 codec '950' : 'cp950', 'ms950' : 'cp950', # euc_jis_2004 codec 'jisx0213' : 'euc_jis_2004', 'eucjis2004' : 'euc_jis_2004', 'euc_jis2004' : 'euc_jis_2004', # euc_jisx0213 codec 'eucjisx0213' : 'euc_jisx0213', # euc_jp codec 'eucjp' : 'euc_jp', 'ujis' : 'euc_jp', 'u_jis' : 'euc_jp', # euc_kr codec 'euckr' : 'euc_kr', 'korean' : 'euc_kr', 'ksc5601' : 'euc_kr', 'ks_c_5601' : 'euc_kr', 'ks_c_5601_1987' : 'euc_kr', 'ksx1001' : 'euc_kr', 'ks_x_1001' : 'euc_kr', # gb18030 codec 'gb18030_2000' : 'gb18030', # gb2312 codec 'chinese' : 'gb2312', 'csiso58gb231280' : 'gb2312', 'euc_cn' : 'gb2312', 'euccn' : 'gb2312', 'eucgb2312_cn' : 'gb2312', 'gb2312_1980' : 'gb2312', 'gb2312_80' : 'gb2312', 'iso_ir_58' : 'gb2312', # gbk codec '936' : 'gbk', 'cp936' : 'gbk', 'ms936' : 'gbk', # hex_codec codec 'hex' : 'hex_codec', # hp_roman8 codec 'roman8' : 'hp_roman8', 'r8' : 'hp_roman8', 'csHPRoman8' : 'hp_roman8', # hz codec 'hzgb' : 'hz', 'hz_gb' : 'hz', 'hz_gb_2312' : 'hz', # iso2022_jp codec 'csiso2022jp' : 'iso2022_jp', 'iso2022jp' : 'iso2022_jp', 'iso_2022_jp' : 'iso2022_jp', # iso2022_jp_1 codec 'iso2022jp_1' : 'iso2022_jp_1', 'iso_2022_jp_1' : 'iso2022_jp_1', # iso2022_jp_2 codec 'iso2022jp_2' : 'iso2022_jp_2', 'iso_2022_jp_2' : 'iso2022_jp_2', # iso2022_jp_2004 codec 'iso_2022_jp_2004' : 'iso2022_jp_2004', 'iso2022jp_2004' : 'iso2022_jp_2004', # iso2022_jp_3 codec 'iso2022jp_3' : 'iso2022_jp_3', 'iso_2022_jp_3' : 'iso2022_jp_3', # iso2022_jp_ext codec 'iso2022jp_ext' : 'iso2022_jp_ext', 'iso_2022_jp_ext' : 'iso2022_jp_ext', # iso2022_kr codec 'csiso2022kr' : 'iso2022_kr', 'iso2022kr' : 'iso2022_kr', 'iso_2022_kr' : 'iso2022_kr', # iso8859_10 codec 'csisolatin6' : 'iso8859_10', 'iso_8859_10' : 'iso8859_10', 'iso_8859_10_1992' : 'iso8859_10', 'iso_ir_157' : 'iso8859_10', 'l6' : 'iso8859_10', 'latin6' : 'iso8859_10', # iso8859_11 codec 'thai' : 'iso8859_11', 'iso_8859_11' : 'iso8859_11', 'iso_8859_11_2001' : 'iso8859_11', # iso8859_13 codec 'iso_8859_13' : 'iso8859_13', 'l7' : 'iso8859_13', 'latin7' : 'iso8859_13', # iso8859_14 codec 'iso_8859_14' : 'iso8859_14', 'iso_8859_14_1998' : 'iso8859_14', 'iso_celtic' : 'iso8859_14', 'iso_ir_199' : 'iso8859_14', 'l8' : 'iso8859_14', 'latin8' : 'iso8859_14', # iso8859_15 codec 'iso_8859_15' : 'iso8859_15', 'l9' : 'iso8859_15', 'latin9' : 'iso8859_15', # iso8859_16 codec 'iso_8859_16' : 'iso8859_16', 'iso_8859_16_2001' : 'iso8859_16', 'iso_ir_226' : 'iso8859_16', 'l10' : 'iso8859_16', 'latin10' : 'iso8859_16', # iso8859_2 codec 'csisolatin2' : 'iso8859_2', 'iso_8859_2' : 'iso8859_2', 'iso_8859_2_1987' : 'iso8859_2', 'iso_ir_101' : 'iso8859_2', 'l2' : 'iso8859_2', 'latin2' : 'iso8859_2', # iso8859_3 codec 'csisolatin3' : 'iso8859_3', 'iso_8859_3' : 'iso8859_3', 'iso_8859_3_1988' : 'iso8859_3', 'iso_ir_109' : 'iso8859_3', 'l3' : 'iso8859_3', 'latin3' : 'iso8859_3', # iso8859_4 codec 'csisolatin4' : 'iso8859_4', 'iso_8859_4' : 'iso8859_4', 'iso_8859_4_1988' : 'iso8859_4', 'iso_ir_110' : 'iso8859_4', 'l4' : 'iso8859_4', 'latin4' : 'iso8859_4', # iso8859_5 codec 'csisolatincyrillic' : 'iso8859_5', 'cyrillic' : 'iso8859_5', 'iso_8859_5' : 'iso8859_5', 'iso_8859_5_1988' : 'iso8859_5', 'iso_ir_144' : 'iso8859_5', # iso8859_6 codec 'arabic' : 'iso8859_6', 'asmo_708' : 'iso8859_6', 'csisolatinarabic' : 'iso8859_6', 'ecma_114' : 'iso8859_6', 'iso_8859_6' : 'iso8859_6', 'iso_8859_6_1987' : 'iso8859_6', 'iso_ir_127' : 'iso8859_6', # iso8859_7 codec 'csisolatingreek' : 'iso8859_7', 'ecma_118' : 'iso8859_7', 'elot_928' : 'iso8859_7', 'greek' : 'iso8859_7', 'greek8' : 'iso8859_7', 'iso_8859_7' : 'iso8859_7', 'iso_8859_7_1987' : 'iso8859_7', 'iso_ir_126' : 'iso8859_7', # iso8859_8 codec 'csisolatinhebrew' : 'iso8859_8', 'hebrew' : 'iso8859_8', 'iso_8859_8' : 'iso8859_8', 'iso_8859_8_1988' : 'iso8859_8', 'iso_ir_138' : 'iso8859_8', # iso8859_9 codec 'csisolatin5' : 'iso8859_9', 'iso_8859_9' : 'iso8859_9', 'iso_8859_9_1989' : 'iso8859_9', 'iso_ir_148' : 'iso8859_9', 'l5' : 'iso8859_9', 'latin5' : 'iso8859_9', # johab codec 'cp1361' : 'johab', 'ms1361' : 'johab', # koi8_r codec 'cskoi8r' : 'koi8_r', # latin_1 codec # # Note that the latin_1 codec is implemented internally in C and a # lot faster than the charmap codec iso8859_1 which uses the same # encoding. This is why we discourage the use of the iso8859_1 # codec and alias it to latin_1 instead. # '8859' : 'latin_1', 'cp819' : 'latin_1', 'csisolatin1' : 'latin_1', 'ibm819' : 'latin_1', 'iso8859' : 'latin_1', 'iso8859_1' : 'latin_1', 'iso_8859_1' : 'latin_1', 'iso_8859_1_1987' : 'latin_1', 'iso_ir_100' : 'latin_1', 'l1' : 'latin_1', 'latin' : 'latin_1', 'latin1' : 'latin_1', # mac_cyrillic codec 'maccyrillic' : 'mac_cyrillic', # mac_greek codec 'macgreek' : 'mac_greek', # mac_iceland codec 'maciceland' : 'mac_iceland', # mac_latin2 codec 'maccentraleurope' : 'mac_latin2', 'maclatin2' : 'mac_latin2', # mac_roman codec 'macroman' : 'mac_roman', # mac_turkish codec 'macturkish' : 'mac_turkish', # mbcs codec 'dbcs' : 'mbcs', # ptcp154 codec 'csptcp154' : 'ptcp154', 'pt154' : 'ptcp154', 'cp154' : 'ptcp154', 'cyrillic_asian' : 'ptcp154', # quopri_codec codec 'quopri' : 'quopri_codec', 'quoted_printable' : 'quopri_codec', 'quotedprintable' : 'quopri_codec', # rot_13 codec 'rot13' : 'rot_13', # shift_jis codec 'csshiftjis' : 'shift_jis', 'shiftjis' : 'shift_jis', 'sjis' : 'shift_jis', 's_jis' : 'shift_jis', # shift_jis_2004 codec 'shiftjis2004' : 'shift_jis_2004', 'sjis_2004' : 'shift_jis_2004', 's_jis_2004' : 'shift_jis_2004', # shift_jisx0213 codec 'shiftjisx0213' : 'shift_jisx0213', 'sjisx0213' : 'shift_jisx0213', 's_jisx0213' : 'shift_jisx0213', # tactis codec 'tis260' : 'tactis', # tis_620 codec 'tis620' : 'tis_620', 'tis_620_0' : 'tis_620', 'tis_620_2529_0' : 'tis_620', 'tis_620_2529_1' : 'tis_620', 'iso_ir_166' : 'tis_620', # utf_16 codec 'u16' : 'utf_16', 'utf16' : 'utf_16', # utf_16_be codec 'unicodebigunmarked' : 'utf_16_be', 'utf_16be' : 'utf_16_be', # utf_16_le codec 'unicodelittleunmarked' : 'utf_16_le', 'utf_16le' : 'utf_16_le', # utf_32 codec 'u32' : 'utf_32', 'utf32' : 'utf_32', # utf_32_be codec 'utf_32be' : 'utf_32_be', # utf_32_le codec 'utf_32le' : 'utf_32_le', # utf_7 codec 'u7' : 'utf_7', 'utf7' : 'utf_7', 'unicode_1_1_utf_7' : 'utf_7', # utf_8 codec 'u8' : 'utf_8', 'utf' : 'utf_8', 'utf8' : 'utf_8', 'utf8_ucs2' : 'utf_8', 'utf8_ucs4' : 'utf_8', # uu_codec codec 'uu' : 'uu_codec', # zlib_codec codec 'zip' : 'zlib_codec', 'zlib' : 'zlib_codec', } iso8859_1.py 0000644 00000032170 15053241622 0006471 0 ustar 00 """ Python Character Mapping Codec iso8859_1 generated from 'MAPPINGS/ISO8859/8859-1.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='iso8859-1', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x04' # 0x04 -> END OF TRANSMISSION u'\x05' # 0x05 -> ENQUIRY u'\x06' # 0x06 -> ACKNOWLEDGE u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION u'\n' # 0x0A -> LINE FEED u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x14' # 0x14 -> DEVICE CONTROL FOUR u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x16 -> SYNCHRONOUS IDLE u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x1a' # 0x1A -> SUBSTITUTE u'\x1b' # 0x1B -> ESCAPE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> <control> u'\x81' # 0x81 -> <control> u'\x82' # 0x82 -> <control> u'\x83' # 0x83 -> <control> u'\x84' # 0x84 -> <control> u'\x85' # 0x85 -> <control> u'\x86' # 0x86 -> <control> u'\x87' # 0x87 -> <control> u'\x88' # 0x88 -> <control> u'\x89' # 0x89 -> <control> u'\x8a' # 0x8A -> <control> u'\x8b' # 0x8B -> <control> u'\x8c' # 0x8C -> <control> u'\x8d' # 0x8D -> <control> u'\x8e' # 0x8E -> <control> u'\x8f' # 0x8F -> <control> u'\x90' # 0x90 -> <control> u'\x91' # 0x91 -> <control> u'\x92' # 0x92 -> <control> u'\x93' # 0x93 -> <control> u'\x94' # 0x94 -> <control> u'\x95' # 0x95 -> <control> u'\x96' # 0x96 -> <control> u'\x97' # 0x97 -> <control> u'\x98' # 0x98 -> <control> u'\x99' # 0x99 -> <control> u'\x9a' # 0x9A -> <control> u'\x9b' # 0x9B -> <control> u'\x9c' # 0x9C -> <control> u'\x9d' # 0x9D -> <control> u'\x9e' # 0x9E -> <control> u'\x9f' # 0x9F -> <control> u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK u'\xa2' # 0xA2 -> CENT SIGN u'\xa3' # 0xA3 -> POUND SIGN u'\xa4' # 0xA4 -> CURRENCY SIGN u'\xa5' # 0xA5 -> YEN SIGN u'\xa6' # 0xA6 -> BROKEN BAR u'\xa7' # 0xA7 -> SECTION SIGN u'\xa8' # 0xA8 -> DIAERESIS u'\xa9' # 0xA9 -> COPYRIGHT SIGN u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xac' # 0xAC -> NOT SIGN u'\xad' # 0xAD -> SOFT HYPHEN u'\xae' # 0xAE -> REGISTERED SIGN u'\xaf' # 0xAF -> MACRON u'\xb0' # 0xB0 -> DEGREE SIGN u'\xb1' # 0xB1 -> PLUS-MINUS SIGN u'\xb2' # 0xB2 -> SUPERSCRIPT TWO u'\xb3' # 0xB3 -> SUPERSCRIPT THREE u'\xb4' # 0xB4 -> ACUTE ACCENT u'\xb5' # 0xB5 -> MICRO SIGN u'\xb6' # 0xB6 -> PILCROW SIGN u'\xb7' # 0xB7 -> MIDDLE DOT u'\xb8' # 0xB8 -> CEDILLA u'\xb9' # 0xB9 -> SUPERSCRIPT ONE u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS u'\xbf' # 0xBF -> INVERTED QUESTION MARK u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic) u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xd7' # 0xD7 -> MULTIPLICATION SIGN u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic) u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic) u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf7' # 0xF7 -> DIVISION SIGN u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic) u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) cp1250.py 0000644 00000033166 15053241622 0006041 0 ustar 00 """ Python Character Mapping Codec cp1250 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp1250', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x04' # 0x04 -> END OF TRANSMISSION u'\x05' # 0x05 -> ENQUIRY u'\x06' # 0x06 -> ACKNOWLEDGE u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION u'\n' # 0x0A -> LINE FEED u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x14' # 0x14 -> DEVICE CONTROL FOUR u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x16 -> SYNCHRONOUS IDLE u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x1a' # 0x1A -> SUBSTITUTE u'\x1b' # 0x1B -> ESCAPE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> DELETE u'\u20ac' # 0x80 -> EURO SIGN u'\ufffe' # 0x81 -> UNDEFINED u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK u'\ufffe' # 0x83 -> UNDEFINED u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS u'\u2020' # 0x86 -> DAGGER u'\u2021' # 0x87 -> DOUBLE DAGGER u'\ufffe' # 0x88 -> UNDEFINED u'\u2030' # 0x89 -> PER MILLE SIGN u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK u'\u015a' # 0x8C -> LATIN CAPITAL LETTER S WITH ACUTE u'\u0164' # 0x8D -> LATIN CAPITAL LETTER T WITH CARON u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE u'\ufffe' # 0x90 -> UNDEFINED u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK u'\u2022' # 0x95 -> BULLET u'\u2013' # 0x96 -> EN DASH u'\u2014' # 0x97 -> EM DASH u'\ufffe' # 0x98 -> UNDEFINED u'\u2122' # 0x99 -> TRADE MARK SIGN u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK u'\u015b' # 0x9C -> LATIN SMALL LETTER S WITH ACUTE u'\u0165' # 0x9D -> LATIN SMALL LETTER T WITH CARON u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON u'\u017a' # 0x9F -> LATIN SMALL LETTER Z WITH ACUTE u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\u02c7' # 0xA1 -> CARON u'\u02d8' # 0xA2 -> BREVE u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE u'\xa4' # 0xA4 -> CURRENCY SIGN u'\u0104' # 0xA5 -> LATIN CAPITAL LETTER A WITH OGONEK u'\xa6' # 0xA6 -> BROKEN BAR u'\xa7' # 0xA7 -> SECTION SIGN u'\xa8' # 0xA8 -> DIAERESIS u'\xa9' # 0xA9 -> COPYRIGHT SIGN u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xac' # 0xAC -> NOT SIGN u'\xad' # 0xAD -> SOFT HYPHEN u'\xae' # 0xAE -> REGISTERED SIGN u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE u'\xb0' # 0xB0 -> DEGREE SIGN u'\xb1' # 0xB1 -> PLUS-MINUS SIGN u'\u02db' # 0xB2 -> OGONEK u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE u'\xb4' # 0xB4 -> ACUTE ACCENT u'\xb5' # 0xB5 -> MICRO SIGN u'\xb6' # 0xB6 -> PILCROW SIGN u'\xb7' # 0xB7 -> MIDDLE DOT u'\xb8' # 0xB8 -> CEDILLA u'\u0105' # 0xB9 -> LATIN SMALL LETTER A WITH OGONEK u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u013d' # 0xBC -> LATIN CAPITAL LETTER L WITH CARON u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT u'\u013e' # 0xBE -> LATIN SMALL LETTER L WITH CARON u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xd7' # 0xD7 -> MULTIPLICATION SIGN u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf7' # 0xF7 -> DIVISION SIGN u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA u'\u02d9' # 0xFF -> DOT ABOVE ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) shift_jis_2004.py 0000644 00000002043 15053241622 0007544 0 ustar 00 # # shift_jis_2004.py: Python Unicode Codec for SHIFT_JIS_2004 # # Written by Hye-Shik Chang <perky@FreeBSD.org> # import _codecs_jp, codecs import _multibytecodec as mbc codec = _codecs_jp.getcodec('shift_jis_2004') class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, codecs.IncrementalEncoder): codec = codec class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, codecs.IncrementalDecoder): codec = codec class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): codec = codec class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): codec = codec def getregentry(): return codecs.CodecInfo( name='shift_jis_2004', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) iso8859_13.py 0000644 00000032327 15053241622 0006560 0 ustar 00 """ Python Character Mapping Codec iso8859_13 generated from 'MAPPINGS/ISO8859/8859-13.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='iso8859-13', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x04' # 0x04 -> END OF TRANSMISSION u'\x05' # 0x05 -> ENQUIRY u'\x06' # 0x06 -> ACKNOWLEDGE u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION u'\n' # 0x0A -> LINE FEED u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x14' # 0x14 -> DEVICE CONTROL FOUR u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x16 -> SYNCHRONOUS IDLE u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x1a' # 0x1A -> SUBSTITUTE u'\x1b' # 0x1B -> ESCAPE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> <control> u'\x81' # 0x81 -> <control> u'\x82' # 0x82 -> <control> u'\x83' # 0x83 -> <control> u'\x84' # 0x84 -> <control> u'\x85' # 0x85 -> <control> u'\x86' # 0x86 -> <control> u'\x87' # 0x87 -> <control> u'\x88' # 0x88 -> <control> u'\x89' # 0x89 -> <control> u'\x8a' # 0x8A -> <control> u'\x8b' # 0x8B -> <control> u'\x8c' # 0x8C -> <control> u'\x8d' # 0x8D -> <control> u'\x8e' # 0x8E -> <control> u'\x8f' # 0x8F -> <control> u'\x90' # 0x90 -> <control> u'\x91' # 0x91 -> <control> u'\x92' # 0x92 -> <control> u'\x93' # 0x93 -> <control> u'\x94' # 0x94 -> <control> u'\x95' # 0x95 -> <control> u'\x96' # 0x96 -> <control> u'\x97' # 0x97 -> <control> u'\x98' # 0x98 -> <control> u'\x99' # 0x99 -> <control> u'\x9a' # 0x9A -> <control> u'\x9b' # 0x9B -> <control> u'\x9c' # 0x9C -> <control> u'\x9d' # 0x9D -> <control> u'\x9e' # 0x9E -> <control> u'\x9f' # 0x9F -> <control> u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\u201d' # 0xA1 -> RIGHT DOUBLE QUOTATION MARK u'\xa2' # 0xA2 -> CENT SIGN u'\xa3' # 0xA3 -> POUND SIGN u'\xa4' # 0xA4 -> CURRENCY SIGN u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK u'\xa6' # 0xA6 -> BROKEN BAR u'\xa7' # 0xA7 -> SECTION SIGN u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE u'\xa9' # 0xA9 -> COPYRIGHT SIGN u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xac' # 0xAC -> NOT SIGN u'\xad' # 0xAD -> SOFT HYPHEN u'\xae' # 0xAE -> REGISTERED SIGN u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE u'\xb0' # 0xB0 -> DEGREE SIGN u'\xb1' # 0xB1 -> PLUS-MINUS SIGN u'\xb2' # 0xB2 -> SUPERSCRIPT TWO u'\xb3' # 0xB3 -> SUPERSCRIPT THREE u'\u201c' # 0xB4 -> LEFT DOUBLE QUOTATION MARK u'\xb5' # 0xB5 -> MICRO SIGN u'\xb6' # 0xB6 -> PILCROW SIGN u'\xb7' # 0xB7 -> MIDDLE DOT u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE u'\xb9' # 0xB9 -> SUPERSCRIPT ONE u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS u'\xe6' # 0xBF -> LATIN SMALL LETTER AE u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xd7' # 0xD7 -> MULTIPLICATION SIGN u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf7' # 0xF7 -> DIVISION SIGN u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON u'\u2019' # 0xFF -> RIGHT SINGLE QUOTATION MARK ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) cp874.py 0000644 00000031063 15053241622 0005766 0 ustar 00 """ Python Character Mapping Codec cp874 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp874', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x04' # 0x04 -> END OF TRANSMISSION u'\x05' # 0x05 -> ENQUIRY u'\x06' # 0x06 -> ACKNOWLEDGE u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION u'\n' # 0x0A -> LINE FEED u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x14' # 0x14 -> DEVICE CONTROL FOUR u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x16 -> SYNCHRONOUS IDLE u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x1a' # 0x1A -> SUBSTITUTE u'\x1b' # 0x1B -> ESCAPE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> DELETE u'\u20ac' # 0x80 -> EURO SIGN u'\ufffe' # 0x81 -> UNDEFINED u'\ufffe' # 0x82 -> UNDEFINED u'\ufffe' # 0x83 -> UNDEFINED u'\ufffe' # 0x84 -> UNDEFINED u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS u'\ufffe' # 0x86 -> UNDEFINED u'\ufffe' # 0x87 -> UNDEFINED u'\ufffe' # 0x88 -> UNDEFINED u'\ufffe' # 0x89 -> UNDEFINED u'\ufffe' # 0x8A -> UNDEFINED u'\ufffe' # 0x8B -> UNDEFINED u'\ufffe' # 0x8C -> UNDEFINED u'\ufffe' # 0x8D -> UNDEFINED u'\ufffe' # 0x8E -> UNDEFINED u'\ufffe' # 0x8F -> UNDEFINED u'\ufffe' # 0x90 -> UNDEFINED u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK u'\u2022' # 0x95 -> BULLET u'\u2013' # 0x96 -> EN DASH u'\u2014' # 0x97 -> EM DASH u'\ufffe' # 0x98 -> UNDEFINED u'\ufffe' # 0x99 -> UNDEFINED u'\ufffe' # 0x9A -> UNDEFINED u'\ufffe' # 0x9B -> UNDEFINED u'\ufffe' # 0x9C -> UNDEFINED u'\ufffe' # 0x9D -> UNDEFINED u'\ufffe' # 0x9E -> UNDEFINED u'\ufffe' # 0x9F -> UNDEFINED u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA u'\u0e24' # 0xC4 -> THAI CHARACTER RU u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING u'\u0e26' # 0xC6 -> THAI CHARACTER LU u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU u'\ufffe' # 0xDB -> UNDEFINED u'\ufffe' # 0xDC -> UNDEFINED u'\ufffe' # 0xDD -> UNDEFINED u'\ufffe' # 0xDE -> UNDEFINED u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN u'\u0e50' # 0xF0 -> THAI DIGIT ZERO u'\u0e51' # 0xF1 -> THAI DIGIT ONE u'\u0e52' # 0xF2 -> THAI DIGIT TWO u'\u0e53' # 0xF3 -> THAI DIGIT THREE u'\u0e54' # 0xF4 -> THAI DIGIT FOUR u'\u0e55' # 0xF5 -> THAI DIGIT FIVE u'\u0e56' # 0xF6 -> THAI DIGIT SIX u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT u'\u0e59' # 0xF9 -> THAI DIGIT NINE u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT u'\ufffe' # 0xFC -> UNDEFINED u'\ufffe' # 0xFD -> UNDEFINED u'\ufffe' # 0xFE -> UNDEFINED u'\ufffe' # 0xFF -> UNDEFINED ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) mac_iceland.py 0000644 00000032672 15053241622 0007347 0 ustar 00 """ Python Character Mapping Codec mac_iceland generated from 'MAPPINGS/VENDORS/APPLE/ICELAND.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='mac-iceland', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> CONTROL CHARACTER u'\x01' # 0x01 -> CONTROL CHARACTER u'\x02' # 0x02 -> CONTROL CHARACTER u'\x03' # 0x03 -> CONTROL CHARACTER u'\x04' # 0x04 -> CONTROL CHARACTER u'\x05' # 0x05 -> CONTROL CHARACTER u'\x06' # 0x06 -> CONTROL CHARACTER u'\x07' # 0x07 -> CONTROL CHARACTER u'\x08' # 0x08 -> CONTROL CHARACTER u'\t' # 0x09 -> CONTROL CHARACTER u'\n' # 0x0A -> CONTROL CHARACTER u'\x0b' # 0x0B -> CONTROL CHARACTER u'\x0c' # 0x0C -> CONTROL CHARACTER u'\r' # 0x0D -> CONTROL CHARACTER u'\x0e' # 0x0E -> CONTROL CHARACTER u'\x0f' # 0x0F -> CONTROL CHARACTER u'\x10' # 0x10 -> CONTROL CHARACTER u'\x11' # 0x11 -> CONTROL CHARACTER u'\x12' # 0x12 -> CONTROL CHARACTER u'\x13' # 0x13 -> CONTROL CHARACTER u'\x14' # 0x14 -> CONTROL CHARACTER u'\x15' # 0x15 -> CONTROL CHARACTER u'\x16' # 0x16 -> CONTROL CHARACTER u'\x17' # 0x17 -> CONTROL CHARACTER u'\x18' # 0x18 -> CONTROL CHARACTER u'\x19' # 0x19 -> CONTROL CHARACTER u'\x1a' # 0x1A -> CONTROL CHARACTER u'\x1b' # 0x1B -> CONTROL CHARACTER u'\x1c' # 0x1C -> CONTROL CHARACTER u'\x1d' # 0x1D -> CONTROL CHARACTER u'\x1e' # 0x1E -> CONTROL CHARACTER u'\x1f' # 0x1F -> CONTROL CHARACTER u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> CONTROL CHARACTER u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS u'\xdd' # 0xA0 -> LATIN CAPITAL LETTER Y WITH ACUTE u'\xb0' # 0xA1 -> DEGREE SIGN u'\xa2' # 0xA2 -> CENT SIGN u'\xa3' # 0xA3 -> POUND SIGN u'\xa7' # 0xA4 -> SECTION SIGN u'\u2022' # 0xA5 -> BULLET u'\xb6' # 0xA6 -> PILCROW SIGN u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S u'\xae' # 0xA8 -> REGISTERED SIGN u'\xa9' # 0xA9 -> COPYRIGHT SIGN u'\u2122' # 0xAA -> TRADE MARK SIGN u'\xb4' # 0xAB -> ACUTE ACCENT u'\xa8' # 0xAC -> DIAERESIS u'\u2260' # 0xAD -> NOT EQUAL TO u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE u'\u221e' # 0xB0 -> INFINITY u'\xb1' # 0xB1 -> PLUS-MINUS SIGN u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO u'\xa5' # 0xB4 -> YEN SIGN u'\xb5' # 0xB5 -> MICRO SIGN u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL u'\u2211' # 0xB7 -> N-ARY SUMMATION u'\u220f' # 0xB8 -> N-ARY PRODUCT u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI u'\u222b' # 0xBA -> INTEGRAL u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA u'\xe6' # 0xBE -> LATIN SMALL LETTER AE u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE u'\xbf' # 0xC0 -> INVERTED QUESTION MARK u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK u'\xac' # 0xC2 -> NOT SIGN u'\u221a' # 0xC3 -> SQUARE ROOT u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK u'\u2248' # 0xC5 -> ALMOST EQUAL TO u'\u2206' # 0xC6 -> INCREMENT u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS u'\xa0' # 0xCA -> NO-BREAK SPACE u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE u'\u2013' # 0xD0 -> EN DASH u'\u2014' # 0xD1 -> EM DASH u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK u'\xf7' # 0xD6 -> DIVISION SIGN u'\u25ca' # 0xD7 -> LOZENGE u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS u'\u2044' # 0xDA -> FRACTION SLASH u'\u20ac' # 0xDB -> EURO SIGN u'\xd0' # 0xDC -> LATIN CAPITAL LETTER ETH u'\xf0' # 0xDD -> LATIN SMALL LETTER ETH u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN u'\xfe' # 0xDF -> LATIN SMALL LETTER THORN u'\xfd' # 0xE0 -> LATIN SMALL LETTER Y WITH ACUTE u'\xb7' # 0xE1 -> MIDDLE DOT u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK u'\u2030' # 0xE4 -> PER MILLE SIGN u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\uf8ff' # 0xF0 -> Apple logo u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT u'\u02dc' # 0xF7 -> SMALL TILDE u'\xaf' # 0xF8 -> MACRON u'\u02d8' # 0xF9 -> BREVE u'\u02d9' # 0xFA -> DOT ABOVE u'\u02da' # 0xFB -> RING ABOVE u'\xb8' # 0xFC -> CEDILLA u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT u'\u02db' # 0xFE -> OGONEK u'\u02c7' # 0xFF -> CARON ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) mac_arabic.py 0000644 00000107563 15053241622 0007173 0 ustar 00 """ Python Character Mapping Codec generated from 'VENDORS/APPLE/ARABIC.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='mac-arabic', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00a0, # NO-BREAK SPACE, right-left 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x008b: 0x06ba, # ARABIC LETTER NOON GHUNNA 0x008c: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE 0x0093: 0x2026, # HORIZONTAL ELLIPSIS, right-left 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x0098: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS 0x009b: 0x00f7, # DIVISION SIGN, right-left 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x00a0: 0x0020, # SPACE, right-left 0x00a1: 0x0021, # EXCLAMATION MARK, right-left 0x00a2: 0x0022, # QUOTATION MARK, right-left 0x00a3: 0x0023, # NUMBER SIGN, right-left 0x00a4: 0x0024, # DOLLAR SIGN, right-left 0x00a5: 0x066a, # ARABIC PERCENT SIGN 0x00a6: 0x0026, # AMPERSAND, right-left 0x00a7: 0x0027, # APOSTROPHE, right-left 0x00a8: 0x0028, # LEFT PARENTHESIS, right-left 0x00a9: 0x0029, # RIGHT PARENTHESIS, right-left 0x00aa: 0x002a, # ASTERISK, right-left 0x00ab: 0x002b, # PLUS SIGN, right-left 0x00ac: 0x060c, # ARABIC COMMA 0x00ad: 0x002d, # HYPHEN-MINUS, right-left 0x00ae: 0x002e, # FULL STOP, right-left 0x00af: 0x002f, # SOLIDUS, right-left 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO, right-left (need override) 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE, right-left (need override) 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO, right-left (need override) 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE, right-left (need override) 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR, right-left (need override) 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE, right-left (need override) 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX, right-left (need override) 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN, right-left (need override) 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT, right-left (need override) 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE, right-left (need override) 0x00ba: 0x003a, # COLON, right-left 0x00bb: 0x061b, # ARABIC SEMICOLON 0x00bc: 0x003c, # LESS-THAN SIGN, right-left 0x00bd: 0x003d, # EQUALS SIGN, right-left 0x00be: 0x003e, # GREATER-THAN SIGN, right-left 0x00bf: 0x061f, # ARABIC QUESTION MARK 0x00c0: 0x274a, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left 0x00c1: 0x0621, # ARABIC LETTER HAMZA 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE 0x00c7: 0x0627, # ARABIC LETTER ALEF 0x00c8: 0x0628, # ARABIC LETTER BEH 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA 0x00ca: 0x062a, # ARABIC LETTER TEH 0x00cb: 0x062b, # ARABIC LETTER THEH 0x00cc: 0x062c, # ARABIC LETTER JEEM 0x00cd: 0x062d, # ARABIC LETTER HAH 0x00ce: 0x062e, # ARABIC LETTER KHAH 0x00cf: 0x062f, # ARABIC LETTER DAL 0x00d0: 0x0630, # ARABIC LETTER THAL 0x00d1: 0x0631, # ARABIC LETTER REH 0x00d2: 0x0632, # ARABIC LETTER ZAIN 0x00d3: 0x0633, # ARABIC LETTER SEEN 0x00d4: 0x0634, # ARABIC LETTER SHEEN 0x00d5: 0x0635, # ARABIC LETTER SAD 0x00d6: 0x0636, # ARABIC LETTER DAD 0x00d7: 0x0637, # ARABIC LETTER TAH 0x00d8: 0x0638, # ARABIC LETTER ZAH 0x00d9: 0x0639, # ARABIC LETTER AIN 0x00da: 0x063a, # ARABIC LETTER GHAIN 0x00db: 0x005b, # LEFT SQUARE BRACKET, right-left 0x00dc: 0x005c, # REVERSE SOLIDUS, right-left 0x00dd: 0x005d, # RIGHT SQUARE BRACKET, right-left 0x00de: 0x005e, # CIRCUMFLEX ACCENT, right-left 0x00df: 0x005f, # LOW LINE, right-left 0x00e0: 0x0640, # ARABIC TATWEEL 0x00e1: 0x0641, # ARABIC LETTER FEH 0x00e2: 0x0642, # ARABIC LETTER QAF 0x00e3: 0x0643, # ARABIC LETTER KAF 0x00e4: 0x0644, # ARABIC LETTER LAM 0x00e5: 0x0645, # ARABIC LETTER MEEM 0x00e6: 0x0646, # ARABIC LETTER NOON 0x00e7: 0x0647, # ARABIC LETTER HEH 0x00e8: 0x0648, # ARABIC LETTER WAW 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA 0x00ea: 0x064a, # ARABIC LETTER YEH 0x00eb: 0x064b, # ARABIC FATHATAN 0x00ec: 0x064c, # ARABIC DAMMATAN 0x00ed: 0x064d, # ARABIC KASRATAN 0x00ee: 0x064e, # ARABIC FATHA 0x00ef: 0x064f, # ARABIC DAMMA 0x00f0: 0x0650, # ARABIC KASRA 0x00f1: 0x0651, # ARABIC SHADDA 0x00f2: 0x0652, # ARABIC SUKUN 0x00f3: 0x067e, # ARABIC LETTER PEH 0x00f4: 0x0679, # ARABIC LETTER TTEH 0x00f5: 0x0686, # ARABIC LETTER TCHEH 0x00f6: 0x06d5, # ARABIC LETTER AE 0x00f7: 0x06a4, # ARABIC LETTER VEH 0x00f8: 0x06af, # ARABIC LETTER GAF 0x00f9: 0x0688, # ARABIC LETTER DDAL 0x00fa: 0x0691, # ARABIC LETTER RREH 0x00fb: 0x007b, # LEFT CURLY BRACKET, right-left 0x00fc: 0x007c, # VERTICAL LINE, right-left 0x00fd: 0x007d, # RIGHT CURLY BRACKET, right-left 0x00fe: 0x0698, # ARABIC LETTER JEH 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> CONTROL CHARACTER u'\x01' # 0x0001 -> CONTROL CHARACTER u'\x02' # 0x0002 -> CONTROL CHARACTER u'\x03' # 0x0003 -> CONTROL CHARACTER u'\x04' # 0x0004 -> CONTROL CHARACTER u'\x05' # 0x0005 -> CONTROL CHARACTER u'\x06' # 0x0006 -> CONTROL CHARACTER u'\x07' # 0x0007 -> CONTROL CHARACTER u'\x08' # 0x0008 -> CONTROL CHARACTER u'\t' # 0x0009 -> CONTROL CHARACTER u'\n' # 0x000a -> CONTROL CHARACTER u'\x0b' # 0x000b -> CONTROL CHARACTER u'\x0c' # 0x000c -> CONTROL CHARACTER u'\r' # 0x000d -> CONTROL CHARACTER u'\x0e' # 0x000e -> CONTROL CHARACTER u'\x0f' # 0x000f -> CONTROL CHARACTER u'\x10' # 0x0010 -> CONTROL CHARACTER u'\x11' # 0x0011 -> CONTROL CHARACTER u'\x12' # 0x0012 -> CONTROL CHARACTER u'\x13' # 0x0013 -> CONTROL CHARACTER u'\x14' # 0x0014 -> CONTROL CHARACTER u'\x15' # 0x0015 -> CONTROL CHARACTER u'\x16' # 0x0016 -> CONTROL CHARACTER u'\x17' # 0x0017 -> CONTROL CHARACTER u'\x18' # 0x0018 -> CONTROL CHARACTER u'\x19' # 0x0019 -> CONTROL CHARACTER u'\x1a' # 0x001a -> CONTROL CHARACTER u'\x1b' # 0x001b -> CONTROL CHARACTER u'\x1c' # 0x001c -> CONTROL CHARACTER u'\x1d' # 0x001d -> CONTROL CHARACTER u'\x1e' # 0x001e -> CONTROL CHARACTER u'\x1f' # 0x001f -> CONTROL CHARACTER u' ' # 0x0020 -> SPACE, left-right u'!' # 0x0021 -> EXCLAMATION MARK, left-right u'"' # 0x0022 -> QUOTATION MARK, left-right u'#' # 0x0023 -> NUMBER SIGN, left-right u'$' # 0x0024 -> DOLLAR SIGN, left-right u'%' # 0x0025 -> PERCENT SIGN, left-right u'&' # 0x0026 -> AMPERSAND, left-right u"'" # 0x0027 -> APOSTROPHE, left-right u'(' # 0x0028 -> LEFT PARENTHESIS, left-right u')' # 0x0029 -> RIGHT PARENTHESIS, left-right u'*' # 0x002a -> ASTERISK, left-right u'+' # 0x002b -> PLUS SIGN, left-right u',' # 0x002c -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR u'-' # 0x002d -> HYPHEN-MINUS, left-right u'.' # 0x002e -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR u'/' # 0x002f -> SOLIDUS, left-right u'0' # 0x0030 -> DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE u'2' # 0x0032 -> DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO u'3' # 0x0033 -> DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE u':' # 0x003a -> COLON, left-right u';' # 0x003b -> SEMICOLON, left-right u'<' # 0x003c -> LESS-THAN SIGN, left-right u'=' # 0x003d -> EQUALS SIGN, left-right u'>' # 0x003e -> GREATER-THAN SIGN, left-right u'?' # 0x003f -> QUESTION MARK, left-right u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET, left-right u'\\' # 0x005c -> REVERSE SOLIDUS, left-right u']' # 0x005d -> RIGHT SQUARE BRACKET, left-right u'^' # 0x005e -> CIRCUMFLEX ACCENT, left-right u'_' # 0x005f -> LOW LINE, left-right u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET, left-right u'|' # 0x007c -> VERTICAL LINE, left-right u'}' # 0x007d -> RIGHT CURLY BRACKET, left-right u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> CONTROL CHARACTER u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xa0' # 0x0081 -> NO-BREAK SPACE, right-left u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS u'\u06ba' # 0x008b -> ARABIC LETTER NOON GHUNNA u'\xab' # 0x008c -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE u'\u2026' # 0x0093 -> HORIZONTAL ELLIPSIS, right-left u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE u'\xbb' # 0x0098 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf7' # 0x009b -> DIVISION SIGN, right-left u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS u' ' # 0x00a0 -> SPACE, right-left u'!' # 0x00a1 -> EXCLAMATION MARK, right-left u'"' # 0x00a2 -> QUOTATION MARK, right-left u'#' # 0x00a3 -> NUMBER SIGN, right-left u'$' # 0x00a4 -> DOLLAR SIGN, right-left u'\u066a' # 0x00a5 -> ARABIC PERCENT SIGN u'&' # 0x00a6 -> AMPERSAND, right-left u"'" # 0x00a7 -> APOSTROPHE, right-left u'(' # 0x00a8 -> LEFT PARENTHESIS, right-left u')' # 0x00a9 -> RIGHT PARENTHESIS, right-left u'*' # 0x00aa -> ASTERISK, right-left u'+' # 0x00ab -> PLUS SIGN, right-left u'\u060c' # 0x00ac -> ARABIC COMMA u'-' # 0x00ad -> HYPHEN-MINUS, right-left u'.' # 0x00ae -> FULL STOP, right-left u'/' # 0x00af -> SOLIDUS, right-left u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO, right-left (need override) u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE, right-left (need override) u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO, right-left (need override) u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE, right-left (need override) u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR, right-left (need override) u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE, right-left (need override) u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX, right-left (need override) u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN, right-left (need override) u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT, right-left (need override) u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE, right-left (need override) u':' # 0x00ba -> COLON, right-left u'\u061b' # 0x00bb -> ARABIC SEMICOLON u'<' # 0x00bc -> LESS-THAN SIGN, right-left u'=' # 0x00bd -> EQUALS SIGN, right-left u'>' # 0x00be -> GREATER-THAN SIGN, right-left u'\u061f' # 0x00bf -> ARABIC QUESTION MARK u'\u274a' # 0x00c0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF u'\u0628' # 0x00c8 -> ARABIC LETTER BEH u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA u'\u062a' # 0x00ca -> ARABIC LETTER TEH u'\u062b' # 0x00cb -> ARABIC LETTER THEH u'\u062c' # 0x00cc -> ARABIC LETTER JEEM u'\u062d' # 0x00cd -> ARABIC LETTER HAH u'\u062e' # 0x00ce -> ARABIC LETTER KHAH u'\u062f' # 0x00cf -> ARABIC LETTER DAL u'\u0630' # 0x00d0 -> ARABIC LETTER THAL u'\u0631' # 0x00d1 -> ARABIC LETTER REH u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN u'\u0635' # 0x00d5 -> ARABIC LETTER SAD u'\u0636' # 0x00d6 -> ARABIC LETTER DAD u'\u0637' # 0x00d7 -> ARABIC LETTER TAH u'\u0638' # 0x00d8 -> ARABIC LETTER ZAH u'\u0639' # 0x00d9 -> ARABIC LETTER AIN u'\u063a' # 0x00da -> ARABIC LETTER GHAIN u'[' # 0x00db -> LEFT SQUARE BRACKET, right-left u'\\' # 0x00dc -> REVERSE SOLIDUS, right-left u']' # 0x00dd -> RIGHT SQUARE BRACKET, right-left u'^' # 0x00de -> CIRCUMFLEX ACCENT, right-left u'_' # 0x00df -> LOW LINE, right-left u'\u0640' # 0x00e0 -> ARABIC TATWEEL u'\u0641' # 0x00e1 -> ARABIC LETTER FEH u'\u0642' # 0x00e2 -> ARABIC LETTER QAF u'\u0643' # 0x00e3 -> ARABIC LETTER KAF u'\u0644' # 0x00e4 -> ARABIC LETTER LAM u'\u0645' # 0x00e5 -> ARABIC LETTER MEEM u'\u0646' # 0x00e6 -> ARABIC LETTER NOON u'\u0647' # 0x00e7 -> ARABIC LETTER HEH u'\u0648' # 0x00e8 -> ARABIC LETTER WAW u'\u0649' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA u'\u064a' # 0x00ea -> ARABIC LETTER YEH u'\u064b' # 0x00eb -> ARABIC FATHATAN u'\u064c' # 0x00ec -> ARABIC DAMMATAN u'\u064d' # 0x00ed -> ARABIC KASRATAN u'\u064e' # 0x00ee -> ARABIC FATHA u'\u064f' # 0x00ef -> ARABIC DAMMA u'\u0650' # 0x00f0 -> ARABIC KASRA u'\u0651' # 0x00f1 -> ARABIC SHADDA u'\u0652' # 0x00f2 -> ARABIC SUKUN u'\u067e' # 0x00f3 -> ARABIC LETTER PEH u'\u0679' # 0x00f4 -> ARABIC LETTER TTEH u'\u0686' # 0x00f5 -> ARABIC LETTER TCHEH u'\u06d5' # 0x00f6 -> ARABIC LETTER AE u'\u06a4' # 0x00f7 -> ARABIC LETTER VEH u'\u06af' # 0x00f8 -> ARABIC LETTER GAF u'\u0688' # 0x00f9 -> ARABIC LETTER DDAL u'\u0691' # 0x00fa -> ARABIC LETTER RREH u'{' # 0x00fb -> LEFT CURLY BRACKET, right-left u'|' # 0x00fc -> VERTICAL LINE, right-left u'}' # 0x00fd -> RIGHT CURLY BRACKET, right-left u'\u0698' # 0x00fe -> ARABIC LETTER JEH u'\u06d2' # 0x00ff -> ARABIC LETTER YEH BARREE ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # CONTROL CHARACTER 0x0001: 0x0001, # CONTROL CHARACTER 0x0002: 0x0002, # CONTROL CHARACTER 0x0003: 0x0003, # CONTROL CHARACTER 0x0004: 0x0004, # CONTROL CHARACTER 0x0005: 0x0005, # CONTROL CHARACTER 0x0006: 0x0006, # CONTROL CHARACTER 0x0007: 0x0007, # CONTROL CHARACTER 0x0008: 0x0008, # CONTROL CHARACTER 0x0009: 0x0009, # CONTROL CHARACTER 0x000a: 0x000a, # CONTROL CHARACTER 0x000b: 0x000b, # CONTROL CHARACTER 0x000c: 0x000c, # CONTROL CHARACTER 0x000d: 0x000d, # CONTROL CHARACTER 0x000e: 0x000e, # CONTROL CHARACTER 0x000f: 0x000f, # CONTROL CHARACTER 0x0010: 0x0010, # CONTROL CHARACTER 0x0011: 0x0011, # CONTROL CHARACTER 0x0012: 0x0012, # CONTROL CHARACTER 0x0013: 0x0013, # CONTROL CHARACTER 0x0014: 0x0014, # CONTROL CHARACTER 0x0015: 0x0015, # CONTROL CHARACTER 0x0016: 0x0016, # CONTROL CHARACTER 0x0017: 0x0017, # CONTROL CHARACTER 0x0018: 0x0018, # CONTROL CHARACTER 0x0019: 0x0019, # CONTROL CHARACTER 0x001a: 0x001a, # CONTROL CHARACTER 0x001b: 0x001b, # CONTROL CHARACTER 0x001c: 0x001c, # CONTROL CHARACTER 0x001d: 0x001d, # CONTROL CHARACTER 0x001e: 0x001e, # CONTROL CHARACTER 0x001f: 0x001f, # CONTROL CHARACTER 0x0020: 0x0020, # SPACE, left-right 0x0020: 0x00a0, # SPACE, right-left 0x0021: 0x0021, # EXCLAMATION MARK, left-right 0x0021: 0x00a1, # EXCLAMATION MARK, right-left 0x0022: 0x0022, # QUOTATION MARK, left-right 0x0022: 0x00a2, # QUOTATION MARK, right-left 0x0023: 0x0023, # NUMBER SIGN, left-right 0x0023: 0x00a3, # NUMBER SIGN, right-left 0x0024: 0x0024, # DOLLAR SIGN, left-right 0x0024: 0x00a4, # DOLLAR SIGN, right-left 0x0025: 0x0025, # PERCENT SIGN, left-right 0x0026: 0x0026, # AMPERSAND, left-right 0x0026: 0x00a6, # AMPERSAND, right-left 0x0027: 0x0027, # APOSTROPHE, left-right 0x0027: 0x00a7, # APOSTROPHE, right-left 0x0028: 0x0028, # LEFT PARENTHESIS, left-right 0x0028: 0x00a8, # LEFT PARENTHESIS, right-left 0x0029: 0x0029, # RIGHT PARENTHESIS, left-right 0x0029: 0x00a9, # RIGHT PARENTHESIS, right-left 0x002a: 0x002a, # ASTERISK, left-right 0x002a: 0x00aa, # ASTERISK, right-left 0x002b: 0x002b, # PLUS SIGN, left-right 0x002b: 0x00ab, # PLUS SIGN, right-left 0x002c: 0x002c, # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR 0x002d: 0x002d, # HYPHEN-MINUS, left-right 0x002d: 0x00ad, # HYPHEN-MINUS, right-left 0x002e: 0x002e, # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR 0x002e: 0x00ae, # FULL STOP, right-left 0x002f: 0x002f, # SOLIDUS, left-right 0x002f: 0x00af, # SOLIDUS, right-left 0x0030: 0x0030, # DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE 0x0032: 0x0032, # DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO 0x0033: 0x0033, # DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE 0x003a: 0x003a, # COLON, left-right 0x003a: 0x00ba, # COLON, right-left 0x003b: 0x003b, # SEMICOLON, left-right 0x003c: 0x003c, # LESS-THAN SIGN, left-right 0x003c: 0x00bc, # LESS-THAN SIGN, right-left 0x003d: 0x003d, # EQUALS SIGN, left-right 0x003d: 0x00bd, # EQUALS SIGN, right-left 0x003e: 0x003e, # GREATER-THAN SIGN, left-right 0x003e: 0x00be, # GREATER-THAN SIGN, right-left 0x003f: 0x003f, # QUESTION MARK, left-right 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET, left-right 0x005b: 0x00db, # LEFT SQUARE BRACKET, right-left 0x005c: 0x005c, # REVERSE SOLIDUS, left-right 0x005c: 0x00dc, # REVERSE SOLIDUS, right-left 0x005d: 0x005d, # RIGHT SQUARE BRACKET, left-right 0x005d: 0x00dd, # RIGHT SQUARE BRACKET, right-left 0x005e: 0x005e, # CIRCUMFLEX ACCENT, left-right 0x005e: 0x00de, # CIRCUMFLEX ACCENT, right-left 0x005f: 0x005f, # LOW LINE, left-right 0x005f: 0x00df, # LOW LINE, right-left 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET, left-right 0x007b: 0x00fb, # LEFT CURLY BRACKET, right-left 0x007c: 0x007c, # VERTICAL LINE, left-right 0x007c: 0x00fc, # VERTICAL LINE, right-left 0x007d: 0x007d, # RIGHT CURLY BRACKET, left-right 0x007d: 0x00fd, # RIGHT CURLY BRACKET, right-left 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # CONTROL CHARACTER 0x00a0: 0x0081, # NO-BREAK SPACE, right-left 0x00ab: 0x008c, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left 0x00bb: 0x0098, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS 0x00f7: 0x009b, # DIVISION SIGN, right-left 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS 0x060c: 0x00ac, # ARABIC COMMA 0x061b: 0x00bb, # ARABIC SEMICOLON 0x061f: 0x00bf, # ARABIC QUESTION MARK 0x0621: 0x00c1, # ARABIC LETTER HAMZA 0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE 0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE 0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE 0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW 0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE 0x0627: 0x00c7, # ARABIC LETTER ALEF 0x0628: 0x00c8, # ARABIC LETTER BEH 0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA 0x062a: 0x00ca, # ARABIC LETTER TEH 0x062b: 0x00cb, # ARABIC LETTER THEH 0x062c: 0x00cc, # ARABIC LETTER JEEM 0x062d: 0x00cd, # ARABIC LETTER HAH 0x062e: 0x00ce, # ARABIC LETTER KHAH 0x062f: 0x00cf, # ARABIC LETTER DAL 0x0630: 0x00d0, # ARABIC LETTER THAL 0x0631: 0x00d1, # ARABIC LETTER REH 0x0632: 0x00d2, # ARABIC LETTER ZAIN 0x0633: 0x00d3, # ARABIC LETTER SEEN 0x0634: 0x00d4, # ARABIC LETTER SHEEN 0x0635: 0x00d5, # ARABIC LETTER SAD 0x0636: 0x00d6, # ARABIC LETTER DAD 0x0637: 0x00d7, # ARABIC LETTER TAH 0x0638: 0x00d8, # ARABIC LETTER ZAH 0x0639: 0x00d9, # ARABIC LETTER AIN 0x063a: 0x00da, # ARABIC LETTER GHAIN 0x0640: 0x00e0, # ARABIC TATWEEL 0x0641: 0x00e1, # ARABIC LETTER FEH 0x0642: 0x00e2, # ARABIC LETTER QAF 0x0643: 0x00e3, # ARABIC LETTER KAF 0x0644: 0x00e4, # ARABIC LETTER LAM 0x0645: 0x00e5, # ARABIC LETTER MEEM 0x0646: 0x00e6, # ARABIC LETTER NOON 0x0647: 0x00e7, # ARABIC LETTER HEH 0x0648: 0x00e8, # ARABIC LETTER WAW 0x0649: 0x00e9, # ARABIC LETTER ALEF MAKSURA 0x064a: 0x00ea, # ARABIC LETTER YEH 0x064b: 0x00eb, # ARABIC FATHATAN 0x064c: 0x00ec, # ARABIC DAMMATAN 0x064d: 0x00ed, # ARABIC KASRATAN 0x064e: 0x00ee, # ARABIC FATHA 0x064f: 0x00ef, # ARABIC DAMMA 0x0650: 0x00f0, # ARABIC KASRA 0x0651: 0x00f1, # ARABIC SHADDA 0x0652: 0x00f2, # ARABIC SUKUN 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO, right-left (need override) 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE, right-left (need override) 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO, right-left (need override) 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE, right-left (need override) 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR, right-left (need override) 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE, right-left (need override) 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX, right-left (need override) 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN, right-left (need override) 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT, right-left (need override) 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE, right-left (need override) 0x066a: 0x00a5, # ARABIC PERCENT SIGN 0x0679: 0x00f4, # ARABIC LETTER TTEH 0x067e: 0x00f3, # ARABIC LETTER PEH 0x0686: 0x00f5, # ARABIC LETTER TCHEH 0x0688: 0x00f9, # ARABIC LETTER DDAL 0x0691: 0x00fa, # ARABIC LETTER RREH 0x0698: 0x00fe, # ARABIC LETTER JEH 0x06a4: 0x00f7, # ARABIC LETTER VEH 0x06af: 0x00f8, # ARABIC LETTER GAF 0x06ba: 0x008b, # ARABIC LETTER NOON GHUNNA 0x06d2: 0x00ff, # ARABIC LETTER YEH BARREE 0x06d5: 0x00f6, # ARABIC LETTER AE 0x2026: 0x0093, # HORIZONTAL ELLIPSIS, right-left 0x274a: 0x00c0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left } cp864.py 0000644 00000102177 15053241622 0005772 0 ustar 00 """ Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP864.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp864', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0025: 0x066a, # ARABIC PERCENT SIGN 0x0080: 0x00b0, # DEGREE SIGN 0x0081: 0x00b7, # MIDDLE DOT 0x0082: 0x2219, # BULLET OPERATOR 0x0083: 0x221a, # SQUARE ROOT 0x0084: 0x2592, # MEDIUM SHADE 0x0085: 0x2500, # FORMS LIGHT HORIZONTAL 0x0086: 0x2502, # FORMS LIGHT VERTICAL 0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL 0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT 0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL 0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT 0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL 0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT 0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT 0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT 0x008f: 0x2518, # FORMS LIGHT UP AND LEFT 0x0090: 0x03b2, # GREEK SMALL BETA 0x0091: 0x221e, # INFINITY 0x0092: 0x03c6, # GREEK SMALL PHI 0x0093: 0x00b1, # PLUS-OR-MINUS SIGN 0x0094: 0x00bd, # FRACTION 1/2 0x0095: 0x00bc, # FRACTION 1/4 0x0096: 0x2248, # ALMOST EQUAL TO 0x0097: 0x00ab, # LEFT POINTING GUILLEMET 0x0098: 0x00bb, # RIGHT POINTING GUILLEMET 0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM 0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM 0x009b: None, # UNDEFINED 0x009c: None, # UNDEFINED 0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM 0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM 0x009f: None, # UNDEFINED 0x00a1: 0x00ad, # SOFT HYPHEN 0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM 0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM 0x00a6: None, # UNDEFINED 0x00a7: None, # UNDEFINED 0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM 0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM 0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM 0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM 0x00ac: 0x060c, # ARABIC COMMA 0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM 0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM 0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE 0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM 0x00bb: 0x061b, # ARABIC SEMICOLON 0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM 0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM 0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM 0x00bf: 0x061f, # ARABIC QUESTION MARK 0x00c0: 0x00a2, # CENT SIGN 0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM 0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM 0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM 0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM 0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM 0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM 0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM 0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM 0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM 0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM 0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM 0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM 0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM 0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM 0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM 0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM 0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM 0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM 0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM 0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM 0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM 0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM 0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM 0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM 0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM 0x00db: 0x00a6, # BROKEN VERTICAL BAR 0x00dc: 0x00ac, # NOT SIGN 0x00dd: 0x00f7, # DIVISION SIGN 0x00de: 0x00d7, # MULTIPLICATION SIGN 0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM 0x00e0: 0x0640, # ARABIC TATWEEL 0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM 0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM 0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM 0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM 0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM 0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM 0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM 0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM 0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM 0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM 0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM 0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM 0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM 0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM 0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM 0x00f1: 0x0651, # ARABIC SHADDAH 0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM 0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM 0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM 0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM 0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM 0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM 0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM 0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM 0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM 0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM 0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: None, # UNDEFINED }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> NULL u'\x01' # 0x0001 -> START OF HEADING u'\x02' # 0x0002 -> START OF TEXT u'\x03' # 0x0003 -> END OF TEXT u'\x04' # 0x0004 -> END OF TRANSMISSION u'\x05' # 0x0005 -> ENQUIRY u'\x06' # 0x0006 -> ACKNOWLEDGE u'\x07' # 0x0007 -> BELL u'\x08' # 0x0008 -> BACKSPACE u'\t' # 0x0009 -> HORIZONTAL TABULATION u'\n' # 0x000a -> LINE FEED u'\x0b' # 0x000b -> VERTICAL TABULATION u'\x0c' # 0x000c -> FORM FEED u'\r' # 0x000d -> CARRIAGE RETURN u'\x0e' # 0x000e -> SHIFT OUT u'\x0f' # 0x000f -> SHIFT IN u'\x10' # 0x0010 -> DATA LINK ESCAPE u'\x11' # 0x0011 -> DEVICE CONTROL ONE u'\x12' # 0x0012 -> DEVICE CONTROL TWO u'\x13' # 0x0013 -> DEVICE CONTROL THREE u'\x14' # 0x0014 -> DEVICE CONTROL FOUR u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x0016 -> SYNCHRONOUS IDLE u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK u'\x18' # 0x0018 -> CANCEL u'\x19' # 0x0019 -> END OF MEDIUM u'\x1a' # 0x001a -> SUBSTITUTE u'\x1b' # 0x001b -> ESCAPE u'\x1c' # 0x001c -> FILE SEPARATOR u'\x1d' # 0x001d -> GROUP SEPARATOR u'\x1e' # 0x001e -> RECORD SEPARATOR u'\x1f' # 0x001f -> UNIT SEPARATOR u' ' # 0x0020 -> SPACE u'!' # 0x0021 -> EXCLAMATION MARK u'"' # 0x0022 -> QUOTATION MARK u'#' # 0x0023 -> NUMBER SIGN u'$' # 0x0024 -> DOLLAR SIGN u'\u066a' # 0x0025 -> ARABIC PERCENT SIGN u'&' # 0x0026 -> AMPERSAND u"'" # 0x0027 -> APOSTROPHE u'(' # 0x0028 -> LEFT PARENTHESIS u')' # 0x0029 -> RIGHT PARENTHESIS u'*' # 0x002a -> ASTERISK u'+' # 0x002b -> PLUS SIGN u',' # 0x002c -> COMMA u'-' # 0x002d -> HYPHEN-MINUS u'.' # 0x002e -> FULL STOP u'/' # 0x002f -> SOLIDUS u'0' # 0x0030 -> DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE u'2' # 0x0032 -> DIGIT TWO u'3' # 0x0033 -> DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE u':' # 0x003a -> COLON u';' # 0x003b -> SEMICOLON u'<' # 0x003c -> LESS-THAN SIGN u'=' # 0x003d -> EQUALS SIGN u'>' # 0x003e -> GREATER-THAN SIGN u'?' # 0x003f -> QUESTION MARK u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET u'\\' # 0x005c -> REVERSE SOLIDUS u']' # 0x005d -> RIGHT SQUARE BRACKET u'^' # 0x005e -> CIRCUMFLEX ACCENT u'_' # 0x005f -> LOW LINE u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET u'|' # 0x007c -> VERTICAL LINE u'}' # 0x007d -> RIGHT CURLY BRACKET u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> DELETE u'\xb0' # 0x0080 -> DEGREE SIGN u'\xb7' # 0x0081 -> MIDDLE DOT u'\u2219' # 0x0082 -> BULLET OPERATOR u'\u221a' # 0x0083 -> SQUARE ROOT u'\u2592' # 0x0084 -> MEDIUM SHADE u'\u2500' # 0x0085 -> FORMS LIGHT HORIZONTAL u'\u2502' # 0x0086 -> FORMS LIGHT VERTICAL u'\u253c' # 0x0087 -> FORMS LIGHT VERTICAL AND HORIZONTAL u'\u2524' # 0x0088 -> FORMS LIGHT VERTICAL AND LEFT u'\u252c' # 0x0089 -> FORMS LIGHT DOWN AND HORIZONTAL u'\u251c' # 0x008a -> FORMS LIGHT VERTICAL AND RIGHT u'\u2534' # 0x008b -> FORMS LIGHT UP AND HORIZONTAL u'\u2510' # 0x008c -> FORMS LIGHT DOWN AND LEFT u'\u250c' # 0x008d -> FORMS LIGHT DOWN AND RIGHT u'\u2514' # 0x008e -> FORMS LIGHT UP AND RIGHT u'\u2518' # 0x008f -> FORMS LIGHT UP AND LEFT u'\u03b2' # 0x0090 -> GREEK SMALL BETA u'\u221e' # 0x0091 -> INFINITY u'\u03c6' # 0x0092 -> GREEK SMALL PHI u'\xb1' # 0x0093 -> PLUS-OR-MINUS SIGN u'\xbd' # 0x0094 -> FRACTION 1/2 u'\xbc' # 0x0095 -> FRACTION 1/4 u'\u2248' # 0x0096 -> ALMOST EQUAL TO u'\xab' # 0x0097 -> LEFT POINTING GUILLEMET u'\xbb' # 0x0098 -> RIGHT POINTING GUILLEMET u'\ufef7' # 0x0099 -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM u'\ufef8' # 0x009a -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM u'\ufffe' # 0x009b -> UNDEFINED u'\ufffe' # 0x009c -> UNDEFINED u'\ufefb' # 0x009d -> ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM u'\ufefc' # 0x009e -> ARABIC LIGATURE LAM WITH ALEF FINAL FORM u'\ufffe' # 0x009f -> UNDEFINED u'\xa0' # 0x00a0 -> NON-BREAKING SPACE u'\xad' # 0x00a1 -> SOFT HYPHEN u'\ufe82' # 0x00a2 -> ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM u'\xa3' # 0x00a3 -> POUND SIGN u'\xa4' # 0x00a4 -> CURRENCY SIGN u'\ufe84' # 0x00a5 -> ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM u'\ufffe' # 0x00a6 -> UNDEFINED u'\ufffe' # 0x00a7 -> UNDEFINED u'\ufe8e' # 0x00a8 -> ARABIC LETTER ALEF FINAL FORM u'\ufe8f' # 0x00a9 -> ARABIC LETTER BEH ISOLATED FORM u'\ufe95' # 0x00aa -> ARABIC LETTER TEH ISOLATED FORM u'\ufe99' # 0x00ab -> ARABIC LETTER THEH ISOLATED FORM u'\u060c' # 0x00ac -> ARABIC COMMA u'\ufe9d' # 0x00ad -> ARABIC LETTER JEEM ISOLATED FORM u'\ufea1' # 0x00ae -> ARABIC LETTER HAH ISOLATED FORM u'\ufea5' # 0x00af -> ARABIC LETTER KHAH ISOLATED FORM u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE u'\ufed1' # 0x00ba -> ARABIC LETTER FEH ISOLATED FORM u'\u061b' # 0x00bb -> ARABIC SEMICOLON u'\ufeb1' # 0x00bc -> ARABIC LETTER SEEN ISOLATED FORM u'\ufeb5' # 0x00bd -> ARABIC LETTER SHEEN ISOLATED FORM u'\ufeb9' # 0x00be -> ARABIC LETTER SAD ISOLATED FORM u'\u061f' # 0x00bf -> ARABIC QUESTION MARK u'\xa2' # 0x00c0 -> CENT SIGN u'\ufe80' # 0x00c1 -> ARABIC LETTER HAMZA ISOLATED FORM u'\ufe81' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM u'\ufe83' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM u'\ufe85' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM u'\ufeca' # 0x00c5 -> ARABIC LETTER AIN FINAL FORM u'\ufe8b' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM u'\ufe8d' # 0x00c7 -> ARABIC LETTER ALEF ISOLATED FORM u'\ufe91' # 0x00c8 -> ARABIC LETTER BEH INITIAL FORM u'\ufe93' # 0x00c9 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM u'\ufe97' # 0x00ca -> ARABIC LETTER TEH INITIAL FORM u'\ufe9b' # 0x00cb -> ARABIC LETTER THEH INITIAL FORM u'\ufe9f' # 0x00cc -> ARABIC LETTER JEEM INITIAL FORM u'\ufea3' # 0x00cd -> ARABIC LETTER HAH INITIAL FORM u'\ufea7' # 0x00ce -> ARABIC LETTER KHAH INITIAL FORM u'\ufea9' # 0x00cf -> ARABIC LETTER DAL ISOLATED FORM u'\ufeab' # 0x00d0 -> ARABIC LETTER THAL ISOLATED FORM u'\ufead' # 0x00d1 -> ARABIC LETTER REH ISOLATED FORM u'\ufeaf' # 0x00d2 -> ARABIC LETTER ZAIN ISOLATED FORM u'\ufeb3' # 0x00d3 -> ARABIC LETTER SEEN INITIAL FORM u'\ufeb7' # 0x00d4 -> ARABIC LETTER SHEEN INITIAL FORM u'\ufebb' # 0x00d5 -> ARABIC LETTER SAD INITIAL FORM u'\ufebf' # 0x00d6 -> ARABIC LETTER DAD INITIAL FORM u'\ufec1' # 0x00d7 -> ARABIC LETTER TAH ISOLATED FORM u'\ufec5' # 0x00d8 -> ARABIC LETTER ZAH ISOLATED FORM u'\ufecb' # 0x00d9 -> ARABIC LETTER AIN INITIAL FORM u'\ufecf' # 0x00da -> ARABIC LETTER GHAIN INITIAL FORM u'\xa6' # 0x00db -> BROKEN VERTICAL BAR u'\xac' # 0x00dc -> NOT SIGN u'\xf7' # 0x00dd -> DIVISION SIGN u'\xd7' # 0x00de -> MULTIPLICATION SIGN u'\ufec9' # 0x00df -> ARABIC LETTER AIN ISOLATED FORM u'\u0640' # 0x00e0 -> ARABIC TATWEEL u'\ufed3' # 0x00e1 -> ARABIC LETTER FEH INITIAL FORM u'\ufed7' # 0x00e2 -> ARABIC LETTER QAF INITIAL FORM u'\ufedb' # 0x00e3 -> ARABIC LETTER KAF INITIAL FORM u'\ufedf' # 0x00e4 -> ARABIC LETTER LAM INITIAL FORM u'\ufee3' # 0x00e5 -> ARABIC LETTER MEEM INITIAL FORM u'\ufee7' # 0x00e6 -> ARABIC LETTER NOON INITIAL FORM u'\ufeeb' # 0x00e7 -> ARABIC LETTER HEH INITIAL FORM u'\ufeed' # 0x00e8 -> ARABIC LETTER WAW ISOLATED FORM u'\ufeef' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA ISOLATED FORM u'\ufef3' # 0x00ea -> ARABIC LETTER YEH INITIAL FORM u'\ufebd' # 0x00eb -> ARABIC LETTER DAD ISOLATED FORM u'\ufecc' # 0x00ec -> ARABIC LETTER AIN MEDIAL FORM u'\ufece' # 0x00ed -> ARABIC LETTER GHAIN FINAL FORM u'\ufecd' # 0x00ee -> ARABIC LETTER GHAIN ISOLATED FORM u'\ufee1' # 0x00ef -> ARABIC LETTER MEEM ISOLATED FORM u'\ufe7d' # 0x00f0 -> ARABIC SHADDA MEDIAL FORM u'\u0651' # 0x00f1 -> ARABIC SHADDAH u'\ufee5' # 0x00f2 -> ARABIC LETTER NOON ISOLATED FORM u'\ufee9' # 0x00f3 -> ARABIC LETTER HEH ISOLATED FORM u'\ufeec' # 0x00f4 -> ARABIC LETTER HEH MEDIAL FORM u'\ufef0' # 0x00f5 -> ARABIC LETTER ALEF MAKSURA FINAL FORM u'\ufef2' # 0x00f6 -> ARABIC LETTER YEH FINAL FORM u'\ufed0' # 0x00f7 -> ARABIC LETTER GHAIN MEDIAL FORM u'\ufed5' # 0x00f8 -> ARABIC LETTER QAF ISOLATED FORM u'\ufef5' # 0x00f9 -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM u'\ufef6' # 0x00fa -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM u'\ufedd' # 0x00fb -> ARABIC LETTER LAM ISOLATED FORM u'\ufed9' # 0x00fc -> ARABIC LETTER KAF ISOLATED FORM u'\ufef1' # 0x00fd -> ARABIC LETTER YEH ISOLATED FORM u'\u25a0' # 0x00fe -> BLACK SQUARE u'\ufffe' # 0x00ff -> UNDEFINED ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # NULL 0x0001: 0x0001, # START OF HEADING 0x0002: 0x0002, # START OF TEXT 0x0003: 0x0003, # END OF TEXT 0x0004: 0x0004, # END OF TRANSMISSION 0x0005: 0x0005, # ENQUIRY 0x0006: 0x0006, # ACKNOWLEDGE 0x0007: 0x0007, # BELL 0x0008: 0x0008, # BACKSPACE 0x0009: 0x0009, # HORIZONTAL TABULATION 0x000a: 0x000a, # LINE FEED 0x000b: 0x000b, # VERTICAL TABULATION 0x000c: 0x000c, # FORM FEED 0x000d: 0x000d, # CARRIAGE RETURN 0x000e: 0x000e, # SHIFT OUT 0x000f: 0x000f, # SHIFT IN 0x0010: 0x0010, # DATA LINK ESCAPE 0x0011: 0x0011, # DEVICE CONTROL ONE 0x0012: 0x0012, # DEVICE CONTROL TWO 0x0013: 0x0013, # DEVICE CONTROL THREE 0x0014: 0x0014, # DEVICE CONTROL FOUR 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE 0x0016: 0x0016, # SYNCHRONOUS IDLE 0x0017: 0x0017, # END OF TRANSMISSION BLOCK 0x0018: 0x0018, # CANCEL 0x0019: 0x0019, # END OF MEDIUM 0x001a: 0x001a, # SUBSTITUTE 0x001b: 0x001b, # ESCAPE 0x001c: 0x001c, # FILE SEPARATOR 0x001d: 0x001d, # GROUP SEPARATOR 0x001e: 0x001e, # RECORD SEPARATOR 0x001f: 0x001f, # UNIT SEPARATOR 0x0020: 0x0020, # SPACE 0x0021: 0x0021, # EXCLAMATION MARK 0x0022: 0x0022, # QUOTATION MARK 0x0023: 0x0023, # NUMBER SIGN 0x0024: 0x0024, # DOLLAR SIGN 0x0026: 0x0026, # AMPERSAND 0x0027: 0x0027, # APOSTROPHE 0x0028: 0x0028, # LEFT PARENTHESIS 0x0029: 0x0029, # RIGHT PARENTHESIS 0x002a: 0x002a, # ASTERISK 0x002b: 0x002b, # PLUS SIGN 0x002c: 0x002c, # COMMA 0x002d: 0x002d, # HYPHEN-MINUS 0x002e: 0x002e, # FULL STOP 0x002f: 0x002f, # SOLIDUS 0x0030: 0x0030, # DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE 0x0032: 0x0032, # DIGIT TWO 0x0033: 0x0033, # DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE 0x003a: 0x003a, # COLON 0x003b: 0x003b, # SEMICOLON 0x003c: 0x003c, # LESS-THAN SIGN 0x003d: 0x003d, # EQUALS SIGN 0x003e: 0x003e, # GREATER-THAN SIGN 0x003f: 0x003f, # QUESTION MARK 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005c: 0x005c, # REVERSE SOLIDUS 0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005f: 0x005f, # LOW LINE 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET 0x007c: 0x007c, # VERTICAL LINE 0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # DELETE 0x00a0: 0x00a0, # NON-BREAKING SPACE 0x00a2: 0x00c0, # CENT SIGN 0x00a3: 0x00a3, # POUND SIGN 0x00a4: 0x00a4, # CURRENCY SIGN 0x00a6: 0x00db, # BROKEN VERTICAL BAR 0x00ab: 0x0097, # LEFT POINTING GUILLEMET 0x00ac: 0x00dc, # NOT SIGN 0x00ad: 0x00a1, # SOFT HYPHEN 0x00b0: 0x0080, # DEGREE SIGN 0x00b1: 0x0093, # PLUS-OR-MINUS SIGN 0x00b7: 0x0081, # MIDDLE DOT 0x00bb: 0x0098, # RIGHT POINTING GUILLEMET 0x00bc: 0x0095, # FRACTION 1/4 0x00bd: 0x0094, # FRACTION 1/2 0x00d7: 0x00de, # MULTIPLICATION SIGN 0x00f7: 0x00dd, # DIVISION SIGN 0x03b2: 0x0090, # GREEK SMALL BETA 0x03c6: 0x0092, # GREEK SMALL PHI 0x060c: 0x00ac, # ARABIC COMMA 0x061b: 0x00bb, # ARABIC SEMICOLON 0x061f: 0x00bf, # ARABIC QUESTION MARK 0x0640: 0x00e0, # ARABIC TATWEEL 0x0651: 0x00f1, # ARABIC SHADDAH 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE 0x066a: 0x0025, # ARABIC PERCENT SIGN 0x2219: 0x0082, # BULLET OPERATOR 0x221a: 0x0083, # SQUARE ROOT 0x221e: 0x0091, # INFINITY 0x2248: 0x0096, # ALMOST EQUAL TO 0x2500: 0x0085, # FORMS LIGHT HORIZONTAL 0x2502: 0x0086, # FORMS LIGHT VERTICAL 0x250c: 0x008d, # FORMS LIGHT DOWN AND RIGHT 0x2510: 0x008c, # FORMS LIGHT DOWN AND LEFT 0x2514: 0x008e, # FORMS LIGHT UP AND RIGHT 0x2518: 0x008f, # FORMS LIGHT UP AND LEFT 0x251c: 0x008a, # FORMS LIGHT VERTICAL AND RIGHT 0x2524: 0x0088, # FORMS LIGHT VERTICAL AND LEFT 0x252c: 0x0089, # FORMS LIGHT DOWN AND HORIZONTAL 0x2534: 0x008b, # FORMS LIGHT UP AND HORIZONTAL 0x253c: 0x0087, # FORMS LIGHT VERTICAL AND HORIZONTAL 0x2592: 0x0084, # MEDIUM SHADE 0x25a0: 0x00fe, # BLACK SQUARE 0xfe7d: 0x00f0, # ARABIC SHADDA MEDIAL FORM 0xfe80: 0x00c1, # ARABIC LETTER HAMZA ISOLATED FORM 0xfe81: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM 0xfe82: 0x00a2, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM 0xfe83: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM 0xfe84: 0x00a5, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM 0xfe85: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM 0xfe8b: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM 0xfe8d: 0x00c7, # ARABIC LETTER ALEF ISOLATED FORM 0xfe8e: 0x00a8, # ARABIC LETTER ALEF FINAL FORM 0xfe8f: 0x00a9, # ARABIC LETTER BEH ISOLATED FORM 0xfe91: 0x00c8, # ARABIC LETTER BEH INITIAL FORM 0xfe93: 0x00c9, # ARABIC LETTER TEH MARBUTA ISOLATED FORM 0xfe95: 0x00aa, # ARABIC LETTER TEH ISOLATED FORM 0xfe97: 0x00ca, # ARABIC LETTER TEH INITIAL FORM 0xfe99: 0x00ab, # ARABIC LETTER THEH ISOLATED FORM 0xfe9b: 0x00cb, # ARABIC LETTER THEH INITIAL FORM 0xfe9d: 0x00ad, # ARABIC LETTER JEEM ISOLATED FORM 0xfe9f: 0x00cc, # ARABIC LETTER JEEM INITIAL FORM 0xfea1: 0x00ae, # ARABIC LETTER HAH ISOLATED FORM 0xfea3: 0x00cd, # ARABIC LETTER HAH INITIAL FORM 0xfea5: 0x00af, # ARABIC LETTER KHAH ISOLATED FORM 0xfea7: 0x00ce, # ARABIC LETTER KHAH INITIAL FORM 0xfea9: 0x00cf, # ARABIC LETTER DAL ISOLATED FORM 0xfeab: 0x00d0, # ARABIC LETTER THAL ISOLATED FORM 0xfead: 0x00d1, # ARABIC LETTER REH ISOLATED FORM 0xfeaf: 0x00d2, # ARABIC LETTER ZAIN ISOLATED FORM 0xfeb1: 0x00bc, # ARABIC LETTER SEEN ISOLATED FORM 0xfeb3: 0x00d3, # ARABIC LETTER SEEN INITIAL FORM 0xfeb5: 0x00bd, # ARABIC LETTER SHEEN ISOLATED FORM 0xfeb7: 0x00d4, # ARABIC LETTER SHEEN INITIAL FORM 0xfeb9: 0x00be, # ARABIC LETTER SAD ISOLATED FORM 0xfebb: 0x00d5, # ARABIC LETTER SAD INITIAL FORM 0xfebd: 0x00eb, # ARABIC LETTER DAD ISOLATED FORM 0xfebf: 0x00d6, # ARABIC LETTER DAD INITIAL FORM 0xfec1: 0x00d7, # ARABIC LETTER TAH ISOLATED FORM 0xfec5: 0x00d8, # ARABIC LETTER ZAH ISOLATED FORM 0xfec9: 0x00df, # ARABIC LETTER AIN ISOLATED FORM 0xfeca: 0x00c5, # ARABIC LETTER AIN FINAL FORM 0xfecb: 0x00d9, # ARABIC LETTER AIN INITIAL FORM 0xfecc: 0x00ec, # ARABIC LETTER AIN MEDIAL FORM 0xfecd: 0x00ee, # ARABIC LETTER GHAIN ISOLATED FORM 0xfece: 0x00ed, # ARABIC LETTER GHAIN FINAL FORM 0xfecf: 0x00da, # ARABIC LETTER GHAIN INITIAL FORM 0xfed0: 0x00f7, # ARABIC LETTER GHAIN MEDIAL FORM 0xfed1: 0x00ba, # ARABIC LETTER FEH ISOLATED FORM 0xfed3: 0x00e1, # ARABIC LETTER FEH INITIAL FORM 0xfed5: 0x00f8, # ARABIC LETTER QAF ISOLATED FORM 0xfed7: 0x00e2, # ARABIC LETTER QAF INITIAL FORM 0xfed9: 0x00fc, # ARABIC LETTER KAF ISOLATED FORM 0xfedb: 0x00e3, # ARABIC LETTER KAF INITIAL FORM 0xfedd: 0x00fb, # ARABIC LETTER LAM ISOLATED FORM 0xfedf: 0x00e4, # ARABIC LETTER LAM INITIAL FORM 0xfee1: 0x00ef, # ARABIC LETTER MEEM ISOLATED FORM 0xfee3: 0x00e5, # ARABIC LETTER MEEM INITIAL FORM 0xfee5: 0x00f2, # ARABIC LETTER NOON ISOLATED FORM 0xfee7: 0x00e6, # ARABIC LETTER NOON INITIAL FORM 0xfee9: 0x00f3, # ARABIC LETTER HEH ISOLATED FORM 0xfeeb: 0x00e7, # ARABIC LETTER HEH INITIAL FORM 0xfeec: 0x00f4, # ARABIC LETTER HEH MEDIAL FORM 0xfeed: 0x00e8, # ARABIC LETTER WAW ISOLATED FORM 0xfeef: 0x00e9, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM 0xfef0: 0x00f5, # ARABIC LETTER ALEF MAKSURA FINAL FORM 0xfef1: 0x00fd, # ARABIC LETTER YEH ISOLATED FORM 0xfef2: 0x00f6, # ARABIC LETTER YEH FINAL FORM 0xfef3: 0x00ea, # ARABIC LETTER YEH INITIAL FORM 0xfef5: 0x00f9, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM 0xfef6: 0x00fa, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM 0xfef7: 0x0099, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM 0xfef8: 0x009a, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM 0xfefb: 0x009d, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM 0xfefc: 0x009e, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM } gb2312.py 0000644 00000002003 15053241622 0006011 0 ustar 00 # # gb2312.py: Python Unicode Codec for GB2312 # # Written by Hye-Shik Chang <perky@FreeBSD.org> # import _codecs_cn, codecs import _multibytecodec as mbc codec = _codecs_cn.getcodec('gb2312') class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, codecs.IncrementalEncoder): codec = codec class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, codecs.IncrementalDecoder): codec = codec class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): codec = codec class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): codec = codec def getregentry(): return codecs.CodecInfo( name='gb2312', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ascii.py 0000644 00000002340 15053241622 0006205 0 ustar 00 """ Python 'ascii' Codec Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. """ import codecs ### Codec APIs class Codec(codecs.Codec): # Note: Binding these as C functions will result in the class not # converting them to methods. This is intended. encode = codecs.ascii_encode decode = codecs.ascii_decode class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.ascii_encode(input, self.errors)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.ascii_decode(input, self.errors)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass class StreamConverter(StreamWriter,StreamReader): encode = codecs.ascii_decode decode = codecs.ascii_encode ### encodings module API def getregentry(): return codecs.CodecInfo( name='ascii', encode=Codec.encode, decode=Codec.decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, ) iso8859_9.py 0000644 00000032144 15053241622 0006502 0 ustar 00 """ Python Character Mapping Codec iso8859_9 generated from 'MAPPINGS/ISO8859/8859-9.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='iso8859-9', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x04' # 0x04 -> END OF TRANSMISSION u'\x05' # 0x05 -> ENQUIRY u'\x06' # 0x06 -> ACKNOWLEDGE u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION u'\n' # 0x0A -> LINE FEED u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x14' # 0x14 -> DEVICE CONTROL FOUR u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x16 -> SYNCHRONOUS IDLE u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x1a' # 0x1A -> SUBSTITUTE u'\x1b' # 0x1B -> ESCAPE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> <control> u'\x81' # 0x81 -> <control> u'\x82' # 0x82 -> <control> u'\x83' # 0x83 -> <control> u'\x84' # 0x84 -> <control> u'\x85' # 0x85 -> <control> u'\x86' # 0x86 -> <control> u'\x87' # 0x87 -> <control> u'\x88' # 0x88 -> <control> u'\x89' # 0x89 -> <control> u'\x8a' # 0x8A -> <control> u'\x8b' # 0x8B -> <control> u'\x8c' # 0x8C -> <control> u'\x8d' # 0x8D -> <control> u'\x8e' # 0x8E -> <control> u'\x8f' # 0x8F -> <control> u'\x90' # 0x90 -> <control> u'\x91' # 0x91 -> <control> u'\x92' # 0x92 -> <control> u'\x93' # 0x93 -> <control> u'\x94' # 0x94 -> <control> u'\x95' # 0x95 -> <control> u'\x96' # 0x96 -> <control> u'\x97' # 0x97 -> <control> u'\x98' # 0x98 -> <control> u'\x99' # 0x99 -> <control> u'\x9a' # 0x9A -> <control> u'\x9b' # 0x9B -> <control> u'\x9c' # 0x9C -> <control> u'\x9d' # 0x9D -> <control> u'\x9e' # 0x9E -> <control> u'\x9f' # 0x9F -> <control> u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK u'\xa2' # 0xA2 -> CENT SIGN u'\xa3' # 0xA3 -> POUND SIGN u'\xa4' # 0xA4 -> CURRENCY SIGN u'\xa5' # 0xA5 -> YEN SIGN u'\xa6' # 0xA6 -> BROKEN BAR u'\xa7' # 0xA7 -> SECTION SIGN u'\xa8' # 0xA8 -> DIAERESIS u'\xa9' # 0xA9 -> COPYRIGHT SIGN u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xac' # 0xAC -> NOT SIGN u'\xad' # 0xAD -> SOFT HYPHEN u'\xae' # 0xAE -> REGISTERED SIGN u'\xaf' # 0xAF -> MACRON u'\xb0' # 0xB0 -> DEGREE SIGN u'\xb1' # 0xB1 -> PLUS-MINUS SIGN u'\xb2' # 0xB2 -> SUPERSCRIPT TWO u'\xb3' # 0xB3 -> SUPERSCRIPT THREE u'\xb4' # 0xB4 -> ACUTE ACCENT u'\xb5' # 0xB5 -> MICRO SIGN u'\xb6' # 0xB6 -> PILCROW SIGN u'\xb7' # 0xB7 -> MIDDLE DOT u'\xb8' # 0xB8 -> CEDILLA u'\xb9' # 0xB9 -> SUPERSCRIPT ONE u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS u'\xbf' # 0xBF -> INVERTED QUESTION MARK u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xd7' # 0xD7 -> MULTIPLICATION SIGN u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf7' # 0xF7 -> DIVISION SIGN u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) iso2022_jp_ext.py 0000644 00000002055 15053241622 0007571 0 ustar 00 # # iso2022_jp_ext.py: Python Unicode Codec for ISO2022_JP_EXT # # Written by Hye-Shik Chang <perky@FreeBSD.org> # import _codecs_iso2022, codecs import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp_ext') class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, codecs.IncrementalEncoder): codec = codec class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, codecs.IncrementalDecoder): codec = codec class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): codec = codec class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): codec = codec def getregentry(): return codecs.CodecInfo( name='iso2022_jp_ext', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) cp860.py 0000644 00000104171 15053241622 0005762 0 ustar 00 """ Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP860.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp860', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE 0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE 0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE 0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE 0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE 0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE 0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE 0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE 0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE 0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x009b: 0x00a2, # CENT SIGN 0x009c: 0x00a3, # POUND SIGN 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x009e: 0x20a7, # PESETA SIGN 0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR 0x00a8: 0x00bf, # INVERTED QUESTION MARK 0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE 0x00aa: 0x00ac, # NOT SIGN 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00b0: 0x2591, # LIGHT SHADE 0x00b1: 0x2592, # MEDIUM SHADE 0x00b2: 0x2593, # DARK SHADE 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x00db: 0x2588, # FULL BLOCK 0x00dc: 0x2584, # LOWER HALF BLOCK 0x00dd: 0x258c, # LEFT HALF BLOCK 0x00de: 0x2590, # RIGHT HALF BLOCK 0x00df: 0x2580, # UPPER HALF BLOCK 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA 0x00e3: 0x03c0, # GREEK SMALL LETTER PI 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA 0x00e6: 0x00b5, # MICRO SIGN 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA 0x00ec: 0x221e, # INFINITY 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON 0x00ef: 0x2229, # INTERSECTION 0x00f0: 0x2261, # IDENTICAL TO 0x00f1: 0x00b1, # PLUS-MINUS SIGN 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO 0x00f4: 0x2320, # TOP HALF INTEGRAL 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL 0x00f6: 0x00f7, # DIVISION SIGN 0x00f7: 0x2248, # ALMOST EQUAL TO 0x00f8: 0x00b0, # DEGREE SIGN 0x00f9: 0x2219, # BULLET OPERATOR 0x00fa: 0x00b7, # MIDDLE DOT 0x00fb: 0x221a, # SQUARE ROOT 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> NULL u'\x01' # 0x0001 -> START OF HEADING u'\x02' # 0x0002 -> START OF TEXT u'\x03' # 0x0003 -> END OF TEXT u'\x04' # 0x0004 -> END OF TRANSMISSION u'\x05' # 0x0005 -> ENQUIRY u'\x06' # 0x0006 -> ACKNOWLEDGE u'\x07' # 0x0007 -> BELL u'\x08' # 0x0008 -> BACKSPACE u'\t' # 0x0009 -> HORIZONTAL TABULATION u'\n' # 0x000a -> LINE FEED u'\x0b' # 0x000b -> VERTICAL TABULATION u'\x0c' # 0x000c -> FORM FEED u'\r' # 0x000d -> CARRIAGE RETURN u'\x0e' # 0x000e -> SHIFT OUT u'\x0f' # 0x000f -> SHIFT IN u'\x10' # 0x0010 -> DATA LINK ESCAPE u'\x11' # 0x0011 -> DEVICE CONTROL ONE u'\x12' # 0x0012 -> DEVICE CONTROL TWO u'\x13' # 0x0013 -> DEVICE CONTROL THREE u'\x14' # 0x0014 -> DEVICE CONTROL FOUR u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x0016 -> SYNCHRONOUS IDLE u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK u'\x18' # 0x0018 -> CANCEL u'\x19' # 0x0019 -> END OF MEDIUM u'\x1a' # 0x001a -> SUBSTITUTE u'\x1b' # 0x001b -> ESCAPE u'\x1c' # 0x001c -> FILE SEPARATOR u'\x1d' # 0x001d -> GROUP SEPARATOR u'\x1e' # 0x001e -> RECORD SEPARATOR u'\x1f' # 0x001f -> UNIT SEPARATOR u' ' # 0x0020 -> SPACE u'!' # 0x0021 -> EXCLAMATION MARK u'"' # 0x0022 -> QUOTATION MARK u'#' # 0x0023 -> NUMBER SIGN u'$' # 0x0024 -> DOLLAR SIGN u'%' # 0x0025 -> PERCENT SIGN u'&' # 0x0026 -> AMPERSAND u"'" # 0x0027 -> APOSTROPHE u'(' # 0x0028 -> LEFT PARENTHESIS u')' # 0x0029 -> RIGHT PARENTHESIS u'*' # 0x002a -> ASTERISK u'+' # 0x002b -> PLUS SIGN u',' # 0x002c -> COMMA u'-' # 0x002d -> HYPHEN-MINUS u'.' # 0x002e -> FULL STOP u'/' # 0x002f -> SOLIDUS u'0' # 0x0030 -> DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE u'2' # 0x0032 -> DIGIT TWO u'3' # 0x0033 -> DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE u':' # 0x003a -> COLON u';' # 0x003b -> SEMICOLON u'<' # 0x003c -> LESS-THAN SIGN u'=' # 0x003d -> EQUALS SIGN u'>' # 0x003e -> GREATER-THAN SIGN u'?' # 0x003f -> QUESTION MARK u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET u'\\' # 0x005c -> REVERSE SOLIDUS u']' # 0x005d -> RIGHT SQUARE BRACKET u'^' # 0x005e -> CIRCUMFLEX ACCENT u'_' # 0x005f -> LOW LINE u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET u'|' # 0x007c -> VERTICAL LINE u'}' # 0x007d -> RIGHT CURLY BRACKET u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> DELETE u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe3' # 0x0084 -> LATIN SMALL LETTER A WITH TILDE u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE u'\xc1' # 0x0086 -> LATIN CAPITAL LETTER A WITH ACUTE u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xca' # 0x0089 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE u'\xcd' # 0x008b -> LATIN CAPITAL LETTER I WITH ACUTE u'\xd4' # 0x008c -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE u'\xc3' # 0x008e -> LATIN CAPITAL LETTER A WITH TILDE u'\xc2' # 0x008f -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xc0' # 0x0091 -> LATIN CAPITAL LETTER A WITH GRAVE u'\xc8' # 0x0092 -> LATIN CAPITAL LETTER E WITH GRAVE u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf5' # 0x0094 -> LATIN SMALL LETTER O WITH TILDE u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE u'\xda' # 0x0096 -> LATIN CAPITAL LETTER U WITH ACUTE u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE u'\xcc' # 0x0098 -> LATIN CAPITAL LETTER I WITH GRAVE u'\xd5' # 0x0099 -> LATIN CAPITAL LETTER O WITH TILDE u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xa2' # 0x009b -> CENT SIGN u'\xa3' # 0x009c -> POUND SIGN u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE u'\u20a7' # 0x009e -> PESETA SIGN u'\xd3' # 0x009f -> LATIN CAPITAL LETTER O WITH ACUTE u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK u'\xd2' # 0x00a9 -> LATIN CAPITAL LETTER O WITH GRAVE u'\xac' # 0x00aa -> NOT SIGN u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2591' # 0x00b0 -> LIGHT SHADE u'\u2592' # 0x00b1 -> MEDIUM SHADE u'\u2593' # 0x00b2 -> DARK SHADE u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT u'\u2588' # 0x00db -> FULL BLOCK u'\u2584' # 0x00dc -> LOWER HALF BLOCK u'\u258c' # 0x00dd -> LEFT HALF BLOCK u'\u2590' # 0x00de -> RIGHT HALF BLOCK u'\u2580' # 0x00df -> UPPER HALF BLOCK u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA u'\xb5' # 0x00e6 -> MICRO SIGN u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA u'\u221e' # 0x00ec -> INFINITY u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON u'\u2229' # 0x00ef -> INTERSECTION u'\u2261' # 0x00f0 -> IDENTICAL TO u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL u'\xf7' # 0x00f6 -> DIVISION SIGN u'\u2248' # 0x00f7 -> ALMOST EQUAL TO u'\xb0' # 0x00f8 -> DEGREE SIGN u'\u2219' # 0x00f9 -> BULLET OPERATOR u'\xb7' # 0x00fa -> MIDDLE DOT u'\u221a' # 0x00fb -> SQUARE ROOT u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N u'\xb2' # 0x00fd -> SUPERSCRIPT TWO u'\u25a0' # 0x00fe -> BLACK SQUARE u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # NULL 0x0001: 0x0001, # START OF HEADING 0x0002: 0x0002, # START OF TEXT 0x0003: 0x0003, # END OF TEXT 0x0004: 0x0004, # END OF TRANSMISSION 0x0005: 0x0005, # ENQUIRY 0x0006: 0x0006, # ACKNOWLEDGE 0x0007: 0x0007, # BELL 0x0008: 0x0008, # BACKSPACE 0x0009: 0x0009, # HORIZONTAL TABULATION 0x000a: 0x000a, # LINE FEED 0x000b: 0x000b, # VERTICAL TABULATION 0x000c: 0x000c, # FORM FEED 0x000d: 0x000d, # CARRIAGE RETURN 0x000e: 0x000e, # SHIFT OUT 0x000f: 0x000f, # SHIFT IN 0x0010: 0x0010, # DATA LINK ESCAPE 0x0011: 0x0011, # DEVICE CONTROL ONE 0x0012: 0x0012, # DEVICE CONTROL TWO 0x0013: 0x0013, # DEVICE CONTROL THREE 0x0014: 0x0014, # DEVICE CONTROL FOUR 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE 0x0016: 0x0016, # SYNCHRONOUS IDLE 0x0017: 0x0017, # END OF TRANSMISSION BLOCK 0x0018: 0x0018, # CANCEL 0x0019: 0x0019, # END OF MEDIUM 0x001a: 0x001a, # SUBSTITUTE 0x001b: 0x001b, # ESCAPE 0x001c: 0x001c, # FILE SEPARATOR 0x001d: 0x001d, # GROUP SEPARATOR 0x001e: 0x001e, # RECORD SEPARATOR 0x001f: 0x001f, # UNIT SEPARATOR 0x0020: 0x0020, # SPACE 0x0021: 0x0021, # EXCLAMATION MARK 0x0022: 0x0022, # QUOTATION MARK 0x0023: 0x0023, # NUMBER SIGN 0x0024: 0x0024, # DOLLAR SIGN 0x0025: 0x0025, # PERCENT SIGN 0x0026: 0x0026, # AMPERSAND 0x0027: 0x0027, # APOSTROPHE 0x0028: 0x0028, # LEFT PARENTHESIS 0x0029: 0x0029, # RIGHT PARENTHESIS 0x002a: 0x002a, # ASTERISK 0x002b: 0x002b, # PLUS SIGN 0x002c: 0x002c, # COMMA 0x002d: 0x002d, # HYPHEN-MINUS 0x002e: 0x002e, # FULL STOP 0x002f: 0x002f, # SOLIDUS 0x0030: 0x0030, # DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE 0x0032: 0x0032, # DIGIT TWO 0x0033: 0x0033, # DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE 0x003a: 0x003a, # COLON 0x003b: 0x003b, # SEMICOLON 0x003c: 0x003c, # LESS-THAN SIGN 0x003d: 0x003d, # EQUALS SIGN 0x003e: 0x003e, # GREATER-THAN SIGN 0x003f: 0x003f, # QUESTION MARK 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005c: 0x005c, # REVERSE SOLIDUS 0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005f: 0x005f, # LOW LINE 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET 0x007c: 0x007c, # VERTICAL LINE 0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # DELETE 0x00a0: 0x00ff, # NO-BREAK SPACE 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK 0x00a2: 0x009b, # CENT SIGN 0x00a3: 0x009c, # POUND SIGN 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00ac: 0x00aa, # NOT SIGN 0x00b0: 0x00f8, # DEGREE SIGN 0x00b1: 0x00f1, # PLUS-MINUS SIGN 0x00b2: 0x00fd, # SUPERSCRIPT TWO 0x00b5: 0x00e6, # MICRO SIGN 0x00b7: 0x00fa, # MIDDLE DOT 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF 0x00bf: 0x00a8, # INVERTED QUESTION MARK 0x00c0: 0x0091, # LATIN CAPITAL LETTER A WITH GRAVE 0x00c1: 0x0086, # LATIN CAPITAL LETTER A WITH ACUTE 0x00c2: 0x008f, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x00c3: 0x008e, # LATIN CAPITAL LETTER A WITH TILDE 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA 0x00c8: 0x0092, # LATIN CAPITAL LETTER E WITH GRAVE 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE 0x00ca: 0x0089, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x00cc: 0x0098, # LATIN CAPITAL LETTER I WITH GRAVE 0x00cd: 0x008b, # LATIN CAPITAL LETTER I WITH ACUTE 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE 0x00d2: 0x00a9, # LATIN CAPITAL LETTER O WITH GRAVE 0x00d3: 0x009f, # LATIN CAPITAL LETTER O WITH ACUTE 0x00d4: 0x008c, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00d5: 0x0099, # LATIN CAPITAL LETTER O WITH TILDE 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE 0x00da: 0x0096, # LATIN CAPITAL LETTER U WITH ACUTE 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00e3: 0x0084, # LATIN SMALL LETTER A WITH TILDE 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00f5: 0x0094, # LATIN SMALL LETTER O WITH TILDE 0x00f7: 0x00f6, # DIVISION SIGN 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON 0x03c0: 0x00e3, # GREEK SMALL LETTER PI 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N 0x20a7: 0x009e, # PESETA SIGN 0x2219: 0x00f9, # BULLET OPERATOR 0x221a: 0x00fb, # SQUARE ROOT 0x221e: 0x00ec, # INFINITY 0x2229: 0x00ef, # INTERSECTION 0x2248: 0x00f7, # ALMOST EQUAL TO 0x2261: 0x00f0, # IDENTICAL TO 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO 0x2320: 0x00f4, # TOP HALF INTEGRAL 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x2580: 0x00df, # UPPER HALF BLOCK 0x2584: 0x00dc, # LOWER HALF BLOCK 0x2588: 0x00db, # FULL BLOCK 0x258c: 0x00dd, # LEFT HALF BLOCK 0x2590: 0x00de, # RIGHT HALF BLOCK 0x2591: 0x00b0, # LIGHT SHADE 0x2592: 0x00b1, # MEDIUM SHADE 0x2593: 0x00b2, # DARK SHADE 0x25a0: 0x00fe, # BLACK SQUARE } gbk.py 0000644 00000001767 15053241622 0005674 0 ustar 00 # # gbk.py: Python Unicode Codec for GBK # # Written by Hye-Shik Chang <perky@FreeBSD.org> # import _codecs_cn, codecs import _multibytecodec as mbc codec = _codecs_cn.getcodec('gbk') class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, codecs.IncrementalEncoder): codec = codec class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, codecs.IncrementalDecoder): codec = codec class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): codec = codec class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): codec = codec def getregentry(): return codecs.CodecInfo( name='gbk', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) cp850.py 0000644 00000103071 15053241622 0005757 0 ustar 00 """ Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP850.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp850', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE 0x009c: 0x00a3, # POUND SIGN 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE 0x009e: 0x00d7, # MULTIPLICATION SIGN 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR 0x00a8: 0x00bf, # INVERTED QUESTION MARK 0x00a9: 0x00ae, # REGISTERED SIGN 0x00aa: 0x00ac, # NOT SIGN 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00b0: 0x2591, # LIGHT SHADE 0x00b1: 0x2592, # MEDIUM SHADE 0x00b2: 0x2593, # DARK SHADE 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE 0x00b8: 0x00a9, # COPYRIGHT SIGN 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT 0x00bd: 0x00a2, # CENT SIGN 0x00be: 0x00a5, # YEN SIGN 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x00cf: 0x00a4, # CURRENCY SIGN 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE 0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x00db: 0x2588, # FULL BLOCK 0x00dc: 0x2584, # LOWER HALF BLOCK 0x00dd: 0x00a6, # BROKEN BAR 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE 0x00df: 0x2580, # UPPER HALF BLOCK 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE 0x00e6: 0x00b5, # MICRO SIGN 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE 0x00ee: 0x00af, # MACRON 0x00ef: 0x00b4, # ACUTE ACCENT 0x00f0: 0x00ad, # SOFT HYPHEN 0x00f1: 0x00b1, # PLUS-MINUS SIGN 0x00f2: 0x2017, # DOUBLE LOW LINE 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS 0x00f4: 0x00b6, # PILCROW SIGN 0x00f5: 0x00a7, # SECTION SIGN 0x00f6: 0x00f7, # DIVISION SIGN 0x00f7: 0x00b8, # CEDILLA 0x00f8: 0x00b0, # DEGREE SIGN 0x00f9: 0x00a8, # DIAERESIS 0x00fa: 0x00b7, # MIDDLE DOT 0x00fb: 0x00b9, # SUPERSCRIPT ONE 0x00fc: 0x00b3, # SUPERSCRIPT THREE 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> NULL u'\x01' # 0x0001 -> START OF HEADING u'\x02' # 0x0002 -> START OF TEXT u'\x03' # 0x0003 -> END OF TEXT u'\x04' # 0x0004 -> END OF TRANSMISSION u'\x05' # 0x0005 -> ENQUIRY u'\x06' # 0x0006 -> ACKNOWLEDGE u'\x07' # 0x0007 -> BELL u'\x08' # 0x0008 -> BACKSPACE u'\t' # 0x0009 -> HORIZONTAL TABULATION u'\n' # 0x000a -> LINE FEED u'\x0b' # 0x000b -> VERTICAL TABULATION u'\x0c' # 0x000c -> FORM FEED u'\r' # 0x000d -> CARRIAGE RETURN u'\x0e' # 0x000e -> SHIFT OUT u'\x0f' # 0x000f -> SHIFT IN u'\x10' # 0x0010 -> DATA LINK ESCAPE u'\x11' # 0x0011 -> DEVICE CONTROL ONE u'\x12' # 0x0012 -> DEVICE CONTROL TWO u'\x13' # 0x0013 -> DEVICE CONTROL THREE u'\x14' # 0x0014 -> DEVICE CONTROL FOUR u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x0016 -> SYNCHRONOUS IDLE u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK u'\x18' # 0x0018 -> CANCEL u'\x19' # 0x0019 -> END OF MEDIUM u'\x1a' # 0x001a -> SUBSTITUTE u'\x1b' # 0x001b -> ESCAPE u'\x1c' # 0x001c -> FILE SEPARATOR u'\x1d' # 0x001d -> GROUP SEPARATOR u'\x1e' # 0x001e -> RECORD SEPARATOR u'\x1f' # 0x001f -> UNIT SEPARATOR u' ' # 0x0020 -> SPACE u'!' # 0x0021 -> EXCLAMATION MARK u'"' # 0x0022 -> QUOTATION MARK u'#' # 0x0023 -> NUMBER SIGN u'$' # 0x0024 -> DOLLAR SIGN u'%' # 0x0025 -> PERCENT SIGN u'&' # 0x0026 -> AMPERSAND u"'" # 0x0027 -> APOSTROPHE u'(' # 0x0028 -> LEFT PARENTHESIS u')' # 0x0029 -> RIGHT PARENTHESIS u'*' # 0x002a -> ASTERISK u'+' # 0x002b -> PLUS SIGN u',' # 0x002c -> COMMA u'-' # 0x002d -> HYPHEN-MINUS u'.' # 0x002e -> FULL STOP u'/' # 0x002f -> SOLIDUS u'0' # 0x0030 -> DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE u'2' # 0x0032 -> DIGIT TWO u'3' # 0x0033 -> DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE u':' # 0x003a -> COLON u';' # 0x003b -> SEMICOLON u'<' # 0x003c -> LESS-THAN SIGN u'=' # 0x003d -> EQUALS SIGN u'>' # 0x003e -> GREATER-THAN SIGN u'?' # 0x003f -> QUESTION MARK u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET u'\\' # 0x005c -> REVERSE SOLIDUS u']' # 0x005d -> RIGHT SQUARE BRACKET u'^' # 0x005e -> CIRCUMFLEX ACCENT u'_' # 0x005f -> LOW LINE u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET u'|' # 0x007c -> VERTICAL LINE u'}' # 0x007d -> RIGHT CURLY BRACKET u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> DELETE u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE u'\xa3' # 0x009c -> POUND SIGN u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE u'\xd7' # 0x009e -> MULTIPLICATION SIGN u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK u'\xae' # 0x00a9 -> REGISTERED SIGN u'\xac' # 0x00aa -> NOT SIGN u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2591' # 0x00b0 -> LIGHT SHADE u'\u2592' # 0x00b1 -> MEDIUM SHADE u'\u2593' # 0x00b2 -> DARK SHADE u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE u'\xa9' # 0x00b8 -> COPYRIGHT SIGN u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT u'\xa2' # 0x00bd -> CENT SIGN u'\xa5' # 0x00be -> YEN SIGN u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL u'\xa4' # 0x00cf -> CURRENCY SIGN u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE u'\u0131' # 0x00d5 -> LATIN SMALL LETTER DOTLESS I u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT u'\u2588' # 0x00db -> FULL BLOCK u'\u2584' # 0x00dc -> LOWER HALF BLOCK u'\xa6' # 0x00dd -> BROKEN BAR u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE u'\u2580' # 0x00df -> UPPER HALF BLOCK u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE u'\xb5' # 0x00e6 -> MICRO SIGN u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE u'\xaf' # 0x00ee -> MACRON u'\xb4' # 0x00ef -> ACUTE ACCENT u'\xad' # 0x00f0 -> SOFT HYPHEN u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN u'\u2017' # 0x00f2 -> DOUBLE LOW LINE u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS u'\xb6' # 0x00f4 -> PILCROW SIGN u'\xa7' # 0x00f5 -> SECTION SIGN u'\xf7' # 0x00f6 -> DIVISION SIGN u'\xb8' # 0x00f7 -> CEDILLA u'\xb0' # 0x00f8 -> DEGREE SIGN u'\xa8' # 0x00f9 -> DIAERESIS u'\xb7' # 0x00fa -> MIDDLE DOT u'\xb9' # 0x00fb -> SUPERSCRIPT ONE u'\xb3' # 0x00fc -> SUPERSCRIPT THREE u'\xb2' # 0x00fd -> SUPERSCRIPT TWO u'\u25a0' # 0x00fe -> BLACK SQUARE u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # NULL 0x0001: 0x0001, # START OF HEADING 0x0002: 0x0002, # START OF TEXT 0x0003: 0x0003, # END OF TEXT 0x0004: 0x0004, # END OF TRANSMISSION 0x0005: 0x0005, # ENQUIRY 0x0006: 0x0006, # ACKNOWLEDGE 0x0007: 0x0007, # BELL 0x0008: 0x0008, # BACKSPACE 0x0009: 0x0009, # HORIZONTAL TABULATION 0x000a: 0x000a, # LINE FEED 0x000b: 0x000b, # VERTICAL TABULATION 0x000c: 0x000c, # FORM FEED 0x000d: 0x000d, # CARRIAGE RETURN 0x000e: 0x000e, # SHIFT OUT 0x000f: 0x000f, # SHIFT IN 0x0010: 0x0010, # DATA LINK ESCAPE 0x0011: 0x0011, # DEVICE CONTROL ONE 0x0012: 0x0012, # DEVICE CONTROL TWO 0x0013: 0x0013, # DEVICE CONTROL THREE 0x0014: 0x0014, # DEVICE CONTROL FOUR 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE 0x0016: 0x0016, # SYNCHRONOUS IDLE 0x0017: 0x0017, # END OF TRANSMISSION BLOCK 0x0018: 0x0018, # CANCEL 0x0019: 0x0019, # END OF MEDIUM 0x001a: 0x001a, # SUBSTITUTE 0x001b: 0x001b, # ESCAPE 0x001c: 0x001c, # FILE SEPARATOR 0x001d: 0x001d, # GROUP SEPARATOR 0x001e: 0x001e, # RECORD SEPARATOR 0x001f: 0x001f, # UNIT SEPARATOR 0x0020: 0x0020, # SPACE 0x0021: 0x0021, # EXCLAMATION MARK 0x0022: 0x0022, # QUOTATION MARK 0x0023: 0x0023, # NUMBER SIGN 0x0024: 0x0024, # DOLLAR SIGN 0x0025: 0x0025, # PERCENT SIGN 0x0026: 0x0026, # AMPERSAND 0x0027: 0x0027, # APOSTROPHE 0x0028: 0x0028, # LEFT PARENTHESIS 0x0029: 0x0029, # RIGHT PARENTHESIS 0x002a: 0x002a, # ASTERISK 0x002b: 0x002b, # PLUS SIGN 0x002c: 0x002c, # COMMA 0x002d: 0x002d, # HYPHEN-MINUS 0x002e: 0x002e, # FULL STOP 0x002f: 0x002f, # SOLIDUS 0x0030: 0x0030, # DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE 0x0032: 0x0032, # DIGIT TWO 0x0033: 0x0033, # DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE 0x003a: 0x003a, # COLON 0x003b: 0x003b, # SEMICOLON 0x003c: 0x003c, # LESS-THAN SIGN 0x003d: 0x003d, # EQUALS SIGN 0x003e: 0x003e, # GREATER-THAN SIGN 0x003f: 0x003f, # QUESTION MARK 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005c: 0x005c, # REVERSE SOLIDUS 0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005f: 0x005f, # LOW LINE 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET 0x007c: 0x007c, # VERTICAL LINE 0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # DELETE 0x00a0: 0x00ff, # NO-BREAK SPACE 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK 0x00a2: 0x00bd, # CENT SIGN 0x00a3: 0x009c, # POUND SIGN 0x00a4: 0x00cf, # CURRENCY SIGN 0x00a5: 0x00be, # YEN SIGN 0x00a6: 0x00dd, # BROKEN BAR 0x00a7: 0x00f5, # SECTION SIGN 0x00a8: 0x00f9, # DIAERESIS 0x00a9: 0x00b8, # COPYRIGHT SIGN 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00ac: 0x00aa, # NOT SIGN 0x00ad: 0x00f0, # SOFT HYPHEN 0x00ae: 0x00a9, # REGISTERED SIGN 0x00af: 0x00ee, # MACRON 0x00b0: 0x00f8, # DEGREE SIGN 0x00b1: 0x00f1, # PLUS-MINUS SIGN 0x00b2: 0x00fd, # SUPERSCRIPT TWO 0x00b3: 0x00fc, # SUPERSCRIPT THREE 0x00b4: 0x00ef, # ACUTE ACCENT 0x00b5: 0x00e6, # MICRO SIGN 0x00b6: 0x00f4, # PILCROW SIGN 0x00b7: 0x00fa, # MIDDLE DOT 0x00b8: 0x00f7, # CEDILLA 0x00b9: 0x00fb, # SUPERSCRIPT ONE 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS 0x00bf: 0x00a8, # INVERTED QUESTION MARK 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00d7: 0x009e, # MULTIPLICATION SIGN 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS 0x00f7: 0x00f6, # DIVISION SIGN 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS 0x0131: 0x00d5, # LATIN SMALL LETTER DOTLESS I 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK 0x2017: 0x00f2, # DOUBLE LOW LINE 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x2580: 0x00df, # UPPER HALF BLOCK 0x2584: 0x00dc, # LOWER HALF BLOCK 0x2588: 0x00db, # FULL BLOCK 0x2591: 0x00b0, # LIGHT SHADE 0x2592: 0x00b1, # MEDIUM SHADE 0x2593: 0x00b2, # DARK SHADE 0x25a0: 0x00fe, # BLACK SQUARE } quopri_codec.py 0000644 00000004221 15053241622 0007571 0 ustar 00 """Codec for quoted-printable encoding. Like base64 and rot13, this returns Python strings, not Unicode. """ import codecs, quopri try: from cStringIO import StringIO except ImportError: from StringIO import StringIO def quopri_encode(input, errors='strict'): """Encode the input, returning a tuple (output object, length consumed). errors defines the error handling to apply. It defaults to 'strict' handling which is the only currently supported error handling for this codec. """ assert errors == 'strict' # using str() because of cStringIO's Unicode undesired Unicode behavior. f = StringIO(str(input)) g = StringIO() quopri.encode(f, g, quotetabs=True) output = g.getvalue() return (output, len(input)) def quopri_decode(input, errors='strict'): """Decode the input, returning a tuple (output object, length consumed). errors defines the error handling to apply. It defaults to 'strict' handling which is the only currently supported error handling for this codec. """ assert errors == 'strict' f = StringIO(str(input)) g = StringIO() quopri.decode(f, g) output = g.getvalue() return (output, len(input)) class Codec(codecs.Codec): def encode(self, input,errors='strict'): return quopri_encode(input,errors) def decode(self, input,errors='strict'): return quopri_decode(input,errors) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return quopri_encode(input, self.errors)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return quopri_decode(input, self.errors)[0] class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass # encodings module API def getregentry(): return codecs.CodecInfo( name='quopri', encode=quopri_encode, decode=quopri_decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, _is_text_encoding=False, ) iso2022_jp_2.py 0000644 00000002045 15053241622 0007131 0 ustar 00 # # iso2022_jp_2.py: Python Unicode Codec for ISO2022_JP_2 # # Written by Hye-Shik Chang <perky@FreeBSD.org> # import _codecs_iso2022, codecs import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp_2') class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, codecs.IncrementalEncoder): codec = codec class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, codecs.IncrementalDecoder): codec = codec class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): codec = codec class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): codec = codec def getregentry(): return codecs.CodecInfo( name='iso2022_jp_2', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) cp1006.py 0000644 00000033000 15053241622 0006023 0 ustar 00 """ Python Character Mapping Codec cp1006 generated from 'MAPPINGS/VENDORS/MISC/CP1006.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp1006', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x04' # 0x04 -> END OF TRANSMISSION u'\x05' # 0x05 -> ENQUIRY u'\x06' # 0x06 -> ACKNOWLEDGE u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION u'\n' # 0x0A -> LINE FEED u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x14' # 0x14 -> DEVICE CONTROL FOUR u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x16 -> SYNCHRONOUS IDLE u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x1a' # 0x1A -> SUBSTITUTE u'\x1b' # 0x1B -> ESCAPE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> DELETE u'\x80' # 0x80 -> <control> u'\x81' # 0x81 -> <control> u'\x82' # 0x82 -> <control> u'\x83' # 0x83 -> <control> u'\x84' # 0x84 -> <control> u'\x85' # 0x85 -> <control> u'\x86' # 0x86 -> <control> u'\x87' # 0x87 -> <control> u'\x88' # 0x88 -> <control> u'\x89' # 0x89 -> <control> u'\x8a' # 0x8A -> <control> u'\x8b' # 0x8B -> <control> u'\x8c' # 0x8C -> <control> u'\x8d' # 0x8D -> <control> u'\x8e' # 0x8E -> <control> u'\x8f' # 0x8F -> <control> u'\x90' # 0x90 -> <control> u'\x91' # 0x91 -> <control> u'\x92' # 0x92 -> <control> u'\x93' # 0x93 -> <control> u'\x94' # 0x94 -> <control> u'\x95' # 0x95 -> <control> u'\x96' # 0x96 -> <control> u'\x97' # 0x97 -> <control> u'\x98' # 0x98 -> <control> u'\x99' # 0x99 -> <control> u'\x9a' # 0x9A -> <control> u'\x9b' # 0x9B -> <control> u'\x9c' # 0x9C -> <control> u'\x9d' # 0x9D -> <control> u'\x9e' # 0x9E -> <control> u'\x9f' # 0x9F -> <control> u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\u06f0' # 0xA1 -> EXTENDED ARABIC-INDIC DIGIT ZERO u'\u06f1' # 0xA2 -> EXTENDED ARABIC-INDIC DIGIT ONE u'\u06f2' # 0xA3 -> EXTENDED ARABIC-INDIC DIGIT TWO u'\u06f3' # 0xA4 -> EXTENDED ARABIC-INDIC DIGIT THREE u'\u06f4' # 0xA5 -> EXTENDED ARABIC-INDIC DIGIT FOUR u'\u06f5' # 0xA6 -> EXTENDED ARABIC-INDIC DIGIT FIVE u'\u06f6' # 0xA7 -> EXTENDED ARABIC-INDIC DIGIT SIX u'\u06f7' # 0xA8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN u'\u06f8' # 0xA9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT u'\u06f9' # 0xAA -> EXTENDED ARABIC-INDIC DIGIT NINE u'\u060c' # 0xAB -> ARABIC COMMA u'\u061b' # 0xAC -> ARABIC SEMICOLON u'\xad' # 0xAD -> SOFT HYPHEN u'\u061f' # 0xAE -> ARABIC QUESTION MARK u'\ufe81' # 0xAF -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM u'\ufe8d' # 0xB0 -> ARABIC LETTER ALEF ISOLATED FORM u'\ufe8e' # 0xB1 -> ARABIC LETTER ALEF FINAL FORM u'\ufe8e' # 0xB2 -> ARABIC LETTER ALEF FINAL FORM u'\ufe8f' # 0xB3 -> ARABIC LETTER BEH ISOLATED FORM u'\ufe91' # 0xB4 -> ARABIC LETTER BEH INITIAL FORM u'\ufb56' # 0xB5 -> ARABIC LETTER PEH ISOLATED FORM u'\ufb58' # 0xB6 -> ARABIC LETTER PEH INITIAL FORM u'\ufe93' # 0xB7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM u'\ufe95' # 0xB8 -> ARABIC LETTER TEH ISOLATED FORM u'\ufe97' # 0xB9 -> ARABIC LETTER TEH INITIAL FORM u'\ufb66' # 0xBA -> ARABIC LETTER TTEH ISOLATED FORM u'\ufb68' # 0xBB -> ARABIC LETTER TTEH INITIAL FORM u'\ufe99' # 0xBC -> ARABIC LETTER THEH ISOLATED FORM u'\ufe9b' # 0xBD -> ARABIC LETTER THEH INITIAL FORM u'\ufe9d' # 0xBE -> ARABIC LETTER JEEM ISOLATED FORM u'\ufe9f' # 0xBF -> ARABIC LETTER JEEM INITIAL FORM u'\ufb7a' # 0xC0 -> ARABIC LETTER TCHEH ISOLATED FORM u'\ufb7c' # 0xC1 -> ARABIC LETTER TCHEH INITIAL FORM u'\ufea1' # 0xC2 -> ARABIC LETTER HAH ISOLATED FORM u'\ufea3' # 0xC3 -> ARABIC LETTER HAH INITIAL FORM u'\ufea5' # 0xC4 -> ARABIC LETTER KHAH ISOLATED FORM u'\ufea7' # 0xC5 -> ARABIC LETTER KHAH INITIAL FORM u'\ufea9' # 0xC6 -> ARABIC LETTER DAL ISOLATED FORM u'\ufb84' # 0xC7 -> ARABIC LETTER DAHAL ISOLATED FORMN u'\ufeab' # 0xC8 -> ARABIC LETTER THAL ISOLATED FORM u'\ufead' # 0xC9 -> ARABIC LETTER REH ISOLATED FORM u'\ufb8c' # 0xCA -> ARABIC LETTER RREH ISOLATED FORM u'\ufeaf' # 0xCB -> ARABIC LETTER ZAIN ISOLATED FORM u'\ufb8a' # 0xCC -> ARABIC LETTER JEH ISOLATED FORM u'\ufeb1' # 0xCD -> ARABIC LETTER SEEN ISOLATED FORM u'\ufeb3' # 0xCE -> ARABIC LETTER SEEN INITIAL FORM u'\ufeb5' # 0xCF -> ARABIC LETTER SHEEN ISOLATED FORM u'\ufeb7' # 0xD0 -> ARABIC LETTER SHEEN INITIAL FORM u'\ufeb9' # 0xD1 -> ARABIC LETTER SAD ISOLATED FORM u'\ufebb' # 0xD2 -> ARABIC LETTER SAD INITIAL FORM u'\ufebd' # 0xD3 -> ARABIC LETTER DAD ISOLATED FORM u'\ufebf' # 0xD4 -> ARABIC LETTER DAD INITIAL FORM u'\ufec1' # 0xD5 -> ARABIC LETTER TAH ISOLATED FORM u'\ufec5' # 0xD6 -> ARABIC LETTER ZAH ISOLATED FORM u'\ufec9' # 0xD7 -> ARABIC LETTER AIN ISOLATED FORM u'\ufeca' # 0xD8 -> ARABIC LETTER AIN FINAL FORM u'\ufecb' # 0xD9 -> ARABIC LETTER AIN INITIAL FORM u'\ufecc' # 0xDA -> ARABIC LETTER AIN MEDIAL FORM u'\ufecd' # 0xDB -> ARABIC LETTER GHAIN ISOLATED FORM u'\ufece' # 0xDC -> ARABIC LETTER GHAIN FINAL FORM u'\ufecf' # 0xDD -> ARABIC LETTER GHAIN INITIAL FORM u'\ufed0' # 0xDE -> ARABIC LETTER GHAIN MEDIAL FORM u'\ufed1' # 0xDF -> ARABIC LETTER FEH ISOLATED FORM u'\ufed3' # 0xE0 -> ARABIC LETTER FEH INITIAL FORM u'\ufed5' # 0xE1 -> ARABIC LETTER QAF ISOLATED FORM u'\ufed7' # 0xE2 -> ARABIC LETTER QAF INITIAL FORM u'\ufed9' # 0xE3 -> ARABIC LETTER KAF ISOLATED FORM u'\ufedb' # 0xE4 -> ARABIC LETTER KAF INITIAL FORM u'\ufb92' # 0xE5 -> ARABIC LETTER GAF ISOLATED FORM u'\ufb94' # 0xE6 -> ARABIC LETTER GAF INITIAL FORM u'\ufedd' # 0xE7 -> ARABIC LETTER LAM ISOLATED FORM u'\ufedf' # 0xE8 -> ARABIC LETTER LAM INITIAL FORM u'\ufee0' # 0xE9 -> ARABIC LETTER LAM MEDIAL FORM u'\ufee1' # 0xEA -> ARABIC LETTER MEEM ISOLATED FORM u'\ufee3' # 0xEB -> ARABIC LETTER MEEM INITIAL FORM u'\ufb9e' # 0xEC -> ARABIC LETTER NOON GHUNNA ISOLATED FORM u'\ufee5' # 0xED -> ARABIC LETTER NOON ISOLATED FORM u'\ufee7' # 0xEE -> ARABIC LETTER NOON INITIAL FORM u'\ufe85' # 0xEF -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM u'\ufeed' # 0xF0 -> ARABIC LETTER WAW ISOLATED FORM u'\ufba6' # 0xF1 -> ARABIC LETTER HEH GOAL ISOLATED FORM u'\ufba8' # 0xF2 -> ARABIC LETTER HEH GOAL INITIAL FORM u'\ufba9' # 0xF3 -> ARABIC LETTER HEH GOAL MEDIAL FORM u'\ufbaa' # 0xF4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM u'\ufe80' # 0xF5 -> ARABIC LETTER HAMZA ISOLATED FORM u'\ufe89' # 0xF6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM u'\ufe8a' # 0xF7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM u'\ufe8b' # 0xF8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM u'\ufef1' # 0xF9 -> ARABIC LETTER YEH ISOLATED FORM u'\ufef2' # 0xFA -> ARABIC LETTER YEH FINAL FORM u'\ufef3' # 0xFB -> ARABIC LETTER YEH INITIAL FORM u'\ufbb0' # 0xFC -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM u'\ufbae' # 0xFD -> ARABIC LETTER YEH BARREE ISOLATED FORM u'\ufe7c' # 0xFE -> ARABIC SHADDA ISOLATED FORM u'\ufe7d' # 0xFF -> ARABIC SHADDA MEDIAL FORM ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) mac_greek.py 0000644 00000033231 15053241622 0007035 0 ustar 00 """ Python Character Mapping Codec mac_greek generated from 'MAPPINGS/VENDORS/APPLE/GREEK.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='mac-greek', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> CONTROL CHARACTER u'\x01' # 0x01 -> CONTROL CHARACTER u'\x02' # 0x02 -> CONTROL CHARACTER u'\x03' # 0x03 -> CONTROL CHARACTER u'\x04' # 0x04 -> CONTROL CHARACTER u'\x05' # 0x05 -> CONTROL CHARACTER u'\x06' # 0x06 -> CONTROL CHARACTER u'\x07' # 0x07 -> CONTROL CHARACTER u'\x08' # 0x08 -> CONTROL CHARACTER u'\t' # 0x09 -> CONTROL CHARACTER u'\n' # 0x0A -> CONTROL CHARACTER u'\x0b' # 0x0B -> CONTROL CHARACTER u'\x0c' # 0x0C -> CONTROL CHARACTER u'\r' # 0x0D -> CONTROL CHARACTER u'\x0e' # 0x0E -> CONTROL CHARACTER u'\x0f' # 0x0F -> CONTROL CHARACTER u'\x10' # 0x10 -> CONTROL CHARACTER u'\x11' # 0x11 -> CONTROL CHARACTER u'\x12' # 0x12 -> CONTROL CHARACTER u'\x13' # 0x13 -> CONTROL CHARACTER u'\x14' # 0x14 -> CONTROL CHARACTER u'\x15' # 0x15 -> CONTROL CHARACTER u'\x16' # 0x16 -> CONTROL CHARACTER u'\x17' # 0x17 -> CONTROL CHARACTER u'\x18' # 0x18 -> CONTROL CHARACTER u'\x19' # 0x19 -> CONTROL CHARACTER u'\x1a' # 0x1A -> CONTROL CHARACTER u'\x1b' # 0x1B -> CONTROL CHARACTER u'\x1c' # 0x1C -> CONTROL CHARACTER u'\x1d' # 0x1D -> CONTROL CHARACTER u'\x1e' # 0x1E -> CONTROL CHARACTER u'\x1f' # 0x1F -> CONTROL CHARACTER u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> CONTROL CHARACTER u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xb9' # 0x81 -> SUPERSCRIPT ONE u'\xb2' # 0x82 -> SUPERSCRIPT TWO u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xb3' # 0x84 -> SUPERSCRIPT THREE u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\u0385' # 0x87 -> GREEK DIALYTIKA TONOS u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS u'\u0384' # 0x8B -> GREEK TONOS u'\xa8' # 0x8C -> DIAERESIS u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xa3' # 0x92 -> POUND SIGN u'\u2122' # 0x93 -> TRADE MARK SIGN u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS u'\u2022' # 0x96 -> BULLET u'\xbd' # 0x97 -> VULGAR FRACTION ONE HALF u'\u2030' # 0x98 -> PER MILLE SIGN u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS u'\xa6' # 0x9B -> BROKEN BAR u'\u20ac' # 0x9C -> EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS u'\u2020' # 0xA0 -> DAGGER u'\u0393' # 0xA1 -> GREEK CAPITAL LETTER GAMMA u'\u0394' # 0xA2 -> GREEK CAPITAL LETTER DELTA u'\u0398' # 0xA3 -> GREEK CAPITAL LETTER THETA u'\u039b' # 0xA4 -> GREEK CAPITAL LETTER LAMDA u'\u039e' # 0xA5 -> GREEK CAPITAL LETTER XI u'\u03a0' # 0xA6 -> GREEK CAPITAL LETTER PI u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S u'\xae' # 0xA8 -> REGISTERED SIGN u'\xa9' # 0xA9 -> COPYRIGHT SIGN u'\u03a3' # 0xAA -> GREEK CAPITAL LETTER SIGMA u'\u03aa' # 0xAB -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA u'\xa7' # 0xAC -> SECTION SIGN u'\u2260' # 0xAD -> NOT EQUAL TO u'\xb0' # 0xAE -> DEGREE SIGN u'\xb7' # 0xAF -> MIDDLE DOT u'\u0391' # 0xB0 -> GREEK CAPITAL LETTER ALPHA u'\xb1' # 0xB1 -> PLUS-MINUS SIGN u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO u'\xa5' # 0xB4 -> YEN SIGN u'\u0392' # 0xB5 -> GREEK CAPITAL LETTER BETA u'\u0395' # 0xB6 -> GREEK CAPITAL LETTER EPSILON u'\u0396' # 0xB7 -> GREEK CAPITAL LETTER ZETA u'\u0397' # 0xB8 -> GREEK CAPITAL LETTER ETA u'\u0399' # 0xB9 -> GREEK CAPITAL LETTER IOTA u'\u039a' # 0xBA -> GREEK CAPITAL LETTER KAPPA u'\u039c' # 0xBB -> GREEK CAPITAL LETTER MU u'\u03a6' # 0xBC -> GREEK CAPITAL LETTER PHI u'\u03ab' # 0xBD -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA u'\u03a8' # 0xBE -> GREEK CAPITAL LETTER PSI u'\u03a9' # 0xBF -> GREEK CAPITAL LETTER OMEGA u'\u03ac' # 0xC0 -> GREEK SMALL LETTER ALPHA WITH TONOS u'\u039d' # 0xC1 -> GREEK CAPITAL LETTER NU u'\xac' # 0xC2 -> NOT SIGN u'\u039f' # 0xC3 -> GREEK CAPITAL LETTER OMICRON u'\u03a1' # 0xC4 -> GREEK CAPITAL LETTER RHO u'\u2248' # 0xC5 -> ALMOST EQUAL TO u'\u03a4' # 0xC6 -> GREEK CAPITAL LETTER TAU u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS u'\xa0' # 0xCA -> NO-BREAK SPACE u'\u03a5' # 0xCB -> GREEK CAPITAL LETTER UPSILON u'\u03a7' # 0xCC -> GREEK CAPITAL LETTER CHI u'\u0386' # 0xCD -> GREEK CAPITAL LETTER ALPHA WITH TONOS u'\u0388' # 0xCE -> GREEK CAPITAL LETTER EPSILON WITH TONOS u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE u'\u2013' # 0xD0 -> EN DASH u'\u2015' # 0xD1 -> HORIZONTAL BAR u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK u'\xf7' # 0xD6 -> DIVISION SIGN u'\u0389' # 0xD7 -> GREEK CAPITAL LETTER ETA WITH TONOS u'\u038a' # 0xD8 -> GREEK CAPITAL LETTER IOTA WITH TONOS u'\u038c' # 0xD9 -> GREEK CAPITAL LETTER OMICRON WITH TONOS u'\u038e' # 0xDA -> GREEK CAPITAL LETTER UPSILON WITH TONOS u'\u03ad' # 0xDB -> GREEK SMALL LETTER EPSILON WITH TONOS u'\u03ae' # 0xDC -> GREEK SMALL LETTER ETA WITH TONOS u'\u03af' # 0xDD -> GREEK SMALL LETTER IOTA WITH TONOS u'\u03cc' # 0xDE -> GREEK SMALL LETTER OMICRON WITH TONOS u'\u038f' # 0xDF -> GREEK CAPITAL LETTER OMEGA WITH TONOS u'\u03cd' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH TONOS u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA u'\u03c8' # 0xE3 -> GREEK SMALL LETTER PSI u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON u'\u03c6' # 0xE6 -> GREEK SMALL LETTER PHI u'\u03b3' # 0xE7 -> GREEK SMALL LETTER GAMMA u'\u03b7' # 0xE8 -> GREEK SMALL LETTER ETA u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA u'\u03be' # 0xEA -> GREEK SMALL LETTER XI u'\u03ba' # 0xEB -> GREEK SMALL LETTER KAPPA u'\u03bb' # 0xEC -> GREEK SMALL LETTER LAMDA u'\u03bc' # 0xED -> GREEK SMALL LETTER MU u'\u03bd' # 0xEE -> GREEK SMALL LETTER NU u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI u'\u03ce' # 0xF1 -> GREEK SMALL LETTER OMEGA WITH TONOS u'\u03c1' # 0xF2 -> GREEK SMALL LETTER RHO u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU u'\u03b8' # 0xF5 -> GREEK SMALL LETTER THETA u'\u03c9' # 0xF6 -> GREEK SMALL LETTER OMEGA u'\u03c2' # 0xF7 -> GREEK SMALL LETTER FINAL SIGMA u'\u03c7' # 0xF8 -> GREEK SMALL LETTER CHI u'\u03c5' # 0xF9 -> GREEK SMALL LETTER UPSILON u'\u03b6' # 0xFA -> GREEK SMALL LETTER ZETA u'\u03ca' # 0xFB -> GREEK SMALL LETTER IOTA WITH DIALYTIKA u'\u03cb' # 0xFC -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA u'\u0390' # 0xFD -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS u'\u03b0' # 0xFE -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS u'\xad' # 0xFF -> SOFT HYPHEN # before Mac OS 9.2.2, was undefined ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) hp_roman8.py 0000644 00000016337 15053241622 0007023 0 ustar 00 """ Python Character Mapping Codec generated from 'hp_roman8.txt' with gencodec.py. Based on data from ftp://dkuug.dk/i18n/charmaps/HP-ROMAN8 (Keld Simonsen) Original source: LaserJet IIP Printer User's Manual HP part no 33471-90901, Hewlet-Packard, June 1989. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_map)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='hp-roman8', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x00a1: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE 0x00a2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x00a3: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE 0x00a4: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0x00a5: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS 0x00a6: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0x00a7: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS 0x00a8: 0x00b4, # ACUTE ACCENT 0x00a9: 0x02cb, # MODIFIER LETTER GRAVE ACCENT (Mandarin Chinese fourth tone) 0x00aa: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT 0x00ab: 0x00a8, # DIAERESIS 0x00ac: 0x02dc, # SMALL TILDE 0x00ad: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x00ae: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0x00af: 0x20a4, # LIRA SIGN 0x00b0: 0x00af, # MACRON 0x00b1: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE 0x00b2: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE 0x00b3: 0x00b0, # DEGREE SIGN 0x00b4: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x00b5: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA 0x00b6: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE 0x00b7: 0x00f1, # LATIN SMALL LETTER N WITH TILDE 0x00b8: 0x00a1, # INVERTED EXCLAMATION MARK 0x00b9: 0x00bf, # INVERTED QUESTION MARK 0x00ba: 0x00a4, # CURRENCY SIGN 0x00bb: 0x00a3, # POUND SIGN 0x00bc: 0x00a5, # YEN SIGN 0x00bd: 0x00a7, # SECTION SIGN 0x00be: 0x0192, # LATIN SMALL LETTER F WITH HOOK 0x00bf: 0x00a2, # CENT SIGN 0x00c0: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00c1: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00c2: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00c3: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00c4: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE 0x00c5: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x00c6: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x00c7: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x00c8: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE 0x00c9: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE 0x00ca: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE 0x00cb: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE 0x00cc: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x00cd: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS 0x00ce: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS 0x00cf: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x00d0: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x00d1: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x00d2: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE 0x00d3: 0x00c6, # LATIN CAPITAL LETTER AE 0x00d4: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE 0x00d5: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE 0x00d6: 0x00f8, # LATIN SMALL LETTER O WITH STROKE 0x00d7: 0x00e6, # LATIN SMALL LETTER AE 0x00d8: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00d9: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE 0x00da: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00db: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00dc: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x00dd: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS 0x00de: 0x00df, # LATIN SMALL LETTER SHARP S (German) 0x00df: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00e0: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE 0x00e1: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE 0x00e2: 0x00e3, # LATIN SMALL LETTER A WITH TILDE 0x00e3: 0x00d0, # LATIN CAPITAL LETTER ETH (Icelandic) 0x00e4: 0x00f0, # LATIN SMALL LETTER ETH (Icelandic) 0x00e5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE 0x00e6: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE 0x00e7: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE 0x00e8: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE 0x00e9: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE 0x00ea: 0x00f5, # LATIN SMALL LETTER O WITH TILDE 0x00eb: 0x0160, # LATIN CAPITAL LETTER S WITH CARON 0x00ec: 0x0161, # LATIN SMALL LETTER S WITH CARON 0x00ed: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00ee: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS 0x00ef: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS 0x00f0: 0x00de, # LATIN CAPITAL LETTER THORN (Icelandic) 0x00f1: 0x00fe, # LATIN SMALL LETTER THORN (Icelandic) 0x00f2: 0x00b7, # MIDDLE DOT 0x00f3: 0x00b5, # MICRO SIGN 0x00f4: 0x00b6, # PILCROW SIGN 0x00f5: 0x00be, # VULGAR FRACTION THREE QUARTERS 0x00f6: 0x2014, # EM DASH 0x00f7: 0x00bc, # VULGAR FRACTION ONE QUARTER 0x00f8: 0x00bd, # VULGAR FRACTION ONE HALF 0x00f9: 0x00aa, # FEMININE ORDINAL INDICATOR 0x00fa: 0x00ba, # MASCULINE ORDINAL INDICATOR 0x00fb: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00fc: 0x25a0, # BLACK SQUARE 0x00fd: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00fe: 0x00b1, # PLUS-MINUS SIGN 0x00ff: None, }) ### Encoding Map encoding_map = codecs.make_encoding_map(decoding_map) mac_cyrillic.py 0000644 00000032616 15053241622 0007560 0 ustar 00 """ Python Character Mapping Codec mac_cyrillic generated from 'MAPPINGS/VENDORS/APPLE/CYRILLIC.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='mac-cyrillic', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> CONTROL CHARACTER u'\x01' # 0x01 -> CONTROL CHARACTER u'\x02' # 0x02 -> CONTROL CHARACTER u'\x03' # 0x03 -> CONTROL CHARACTER u'\x04' # 0x04 -> CONTROL CHARACTER u'\x05' # 0x05 -> CONTROL CHARACTER u'\x06' # 0x06 -> CONTROL CHARACTER u'\x07' # 0x07 -> CONTROL CHARACTER u'\x08' # 0x08 -> CONTROL CHARACTER u'\t' # 0x09 -> CONTROL CHARACTER u'\n' # 0x0A -> CONTROL CHARACTER u'\x0b' # 0x0B -> CONTROL CHARACTER u'\x0c' # 0x0C -> CONTROL CHARACTER u'\r' # 0x0D -> CONTROL CHARACTER u'\x0e' # 0x0E -> CONTROL CHARACTER u'\x0f' # 0x0F -> CONTROL CHARACTER u'\x10' # 0x10 -> CONTROL CHARACTER u'\x11' # 0x11 -> CONTROL CHARACTER u'\x12' # 0x12 -> CONTROL CHARACTER u'\x13' # 0x13 -> CONTROL CHARACTER u'\x14' # 0x14 -> CONTROL CHARACTER u'\x15' # 0x15 -> CONTROL CHARACTER u'\x16' # 0x16 -> CONTROL CHARACTER u'\x17' # 0x17 -> CONTROL CHARACTER u'\x18' # 0x18 -> CONTROL CHARACTER u'\x19' # 0x19 -> CONTROL CHARACTER u'\x1a' # 0x1A -> CONTROL CHARACTER u'\x1b' # 0x1B -> CONTROL CHARACTER u'\x1c' # 0x1C -> CONTROL CHARACTER u'\x1d' # 0x1D -> CONTROL CHARACTER u'\x1e' # 0x1E -> CONTROL CHARACTER u'\x1f' # 0x1F -> CONTROL CHARACTER u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> CONTROL CHARACTER u'\u0410' # 0x80 -> CYRILLIC CAPITAL LETTER A u'\u0411' # 0x81 -> CYRILLIC CAPITAL LETTER BE u'\u0412' # 0x82 -> CYRILLIC CAPITAL LETTER VE u'\u0413' # 0x83 -> CYRILLIC CAPITAL LETTER GHE u'\u0414' # 0x84 -> CYRILLIC CAPITAL LETTER DE u'\u0415' # 0x85 -> CYRILLIC CAPITAL LETTER IE u'\u0416' # 0x86 -> CYRILLIC CAPITAL LETTER ZHE u'\u0417' # 0x87 -> CYRILLIC CAPITAL LETTER ZE u'\u0418' # 0x88 -> CYRILLIC CAPITAL LETTER I u'\u0419' # 0x89 -> CYRILLIC CAPITAL LETTER SHORT I u'\u041a' # 0x8A -> CYRILLIC CAPITAL LETTER KA u'\u041b' # 0x8B -> CYRILLIC CAPITAL LETTER EL u'\u041c' # 0x8C -> CYRILLIC CAPITAL LETTER EM u'\u041d' # 0x8D -> CYRILLIC CAPITAL LETTER EN u'\u041e' # 0x8E -> CYRILLIC CAPITAL LETTER O u'\u041f' # 0x8F -> CYRILLIC CAPITAL LETTER PE u'\u0420' # 0x90 -> CYRILLIC CAPITAL LETTER ER u'\u0421' # 0x91 -> CYRILLIC CAPITAL LETTER ES u'\u0422' # 0x92 -> CYRILLIC CAPITAL LETTER TE u'\u0423' # 0x93 -> CYRILLIC CAPITAL LETTER U u'\u0424' # 0x94 -> CYRILLIC CAPITAL LETTER EF u'\u0425' # 0x95 -> CYRILLIC CAPITAL LETTER HA u'\u0426' # 0x96 -> CYRILLIC CAPITAL LETTER TSE u'\u0427' # 0x97 -> CYRILLIC CAPITAL LETTER CHE u'\u0428' # 0x98 -> CYRILLIC CAPITAL LETTER SHA u'\u0429' # 0x99 -> CYRILLIC CAPITAL LETTER SHCHA u'\u042a' # 0x9A -> CYRILLIC CAPITAL LETTER HARD SIGN u'\u042b' # 0x9B -> CYRILLIC CAPITAL LETTER YERU u'\u042c' # 0x9C -> CYRILLIC CAPITAL LETTER SOFT SIGN u'\u042d' # 0x9D -> CYRILLIC CAPITAL LETTER E u'\u042e' # 0x9E -> CYRILLIC CAPITAL LETTER YU u'\u042f' # 0x9F -> CYRILLIC CAPITAL LETTER YA u'\u2020' # 0xA0 -> DAGGER u'\xb0' # 0xA1 -> DEGREE SIGN u'\u0490' # 0xA2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN u'\xa3' # 0xA3 -> POUND SIGN u'\xa7' # 0xA4 -> SECTION SIGN u'\u2022' # 0xA5 -> BULLET u'\xb6' # 0xA6 -> PILCROW SIGN u'\u0406' # 0xA7 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I u'\xae' # 0xA8 -> REGISTERED SIGN u'\xa9' # 0xA9 -> COPYRIGHT SIGN u'\u2122' # 0xAA -> TRADE MARK SIGN u'\u0402' # 0xAB -> CYRILLIC CAPITAL LETTER DJE u'\u0452' # 0xAC -> CYRILLIC SMALL LETTER DJE u'\u2260' # 0xAD -> NOT EQUAL TO u'\u0403' # 0xAE -> CYRILLIC CAPITAL LETTER GJE u'\u0453' # 0xAF -> CYRILLIC SMALL LETTER GJE u'\u221e' # 0xB0 -> INFINITY u'\xb1' # 0xB1 -> PLUS-MINUS SIGN u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO u'\u0456' # 0xB4 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I u'\xb5' # 0xB5 -> MICRO SIGN u'\u0491' # 0xB6 -> CYRILLIC SMALL LETTER GHE WITH UPTURN u'\u0408' # 0xB7 -> CYRILLIC CAPITAL LETTER JE u'\u0404' # 0xB8 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE u'\u0454' # 0xB9 -> CYRILLIC SMALL LETTER UKRAINIAN IE u'\u0407' # 0xBA -> CYRILLIC CAPITAL LETTER YI u'\u0457' # 0xBB -> CYRILLIC SMALL LETTER YI u'\u0409' # 0xBC -> CYRILLIC CAPITAL LETTER LJE u'\u0459' # 0xBD -> CYRILLIC SMALL LETTER LJE u'\u040a' # 0xBE -> CYRILLIC CAPITAL LETTER NJE u'\u045a' # 0xBF -> CYRILLIC SMALL LETTER NJE u'\u0458' # 0xC0 -> CYRILLIC SMALL LETTER JE u'\u0405' # 0xC1 -> CYRILLIC CAPITAL LETTER DZE u'\xac' # 0xC2 -> NOT SIGN u'\u221a' # 0xC3 -> SQUARE ROOT u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK u'\u2248' # 0xC5 -> ALMOST EQUAL TO u'\u2206' # 0xC6 -> INCREMENT u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS u'\xa0' # 0xCA -> NO-BREAK SPACE u'\u040b' # 0xCB -> CYRILLIC CAPITAL LETTER TSHE u'\u045b' # 0xCC -> CYRILLIC SMALL LETTER TSHE u'\u040c' # 0xCD -> CYRILLIC CAPITAL LETTER KJE u'\u045c' # 0xCE -> CYRILLIC SMALL LETTER KJE u'\u0455' # 0xCF -> CYRILLIC SMALL LETTER DZE u'\u2013' # 0xD0 -> EN DASH u'\u2014' # 0xD1 -> EM DASH u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK u'\xf7' # 0xD6 -> DIVISION SIGN u'\u201e' # 0xD7 -> DOUBLE LOW-9 QUOTATION MARK u'\u040e' # 0xD8 -> CYRILLIC CAPITAL LETTER SHORT U u'\u045e' # 0xD9 -> CYRILLIC SMALL LETTER SHORT U u'\u040f' # 0xDA -> CYRILLIC CAPITAL LETTER DZHE u'\u045f' # 0xDB -> CYRILLIC SMALL LETTER DZHE u'\u2116' # 0xDC -> NUMERO SIGN u'\u0401' # 0xDD -> CYRILLIC CAPITAL LETTER IO u'\u0451' # 0xDE -> CYRILLIC SMALL LETTER IO u'\u044f' # 0xDF -> CYRILLIC SMALL LETTER YA u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU u'\u20ac' # 0xFF -> EURO SIGN ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) mac_turkish.py 0000644 00000032711 15053241622 0007433 0 ustar 00 """ Python Character Mapping Codec mac_turkish generated from 'MAPPINGS/VENDORS/APPLE/TURKISH.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='mac-turkish', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> CONTROL CHARACTER u'\x01' # 0x01 -> CONTROL CHARACTER u'\x02' # 0x02 -> CONTROL CHARACTER u'\x03' # 0x03 -> CONTROL CHARACTER u'\x04' # 0x04 -> CONTROL CHARACTER u'\x05' # 0x05 -> CONTROL CHARACTER u'\x06' # 0x06 -> CONTROL CHARACTER u'\x07' # 0x07 -> CONTROL CHARACTER u'\x08' # 0x08 -> CONTROL CHARACTER u'\t' # 0x09 -> CONTROL CHARACTER u'\n' # 0x0A -> CONTROL CHARACTER u'\x0b' # 0x0B -> CONTROL CHARACTER u'\x0c' # 0x0C -> CONTROL CHARACTER u'\r' # 0x0D -> CONTROL CHARACTER u'\x0e' # 0x0E -> CONTROL CHARACTER u'\x0f' # 0x0F -> CONTROL CHARACTER u'\x10' # 0x10 -> CONTROL CHARACTER u'\x11' # 0x11 -> CONTROL CHARACTER u'\x12' # 0x12 -> CONTROL CHARACTER u'\x13' # 0x13 -> CONTROL CHARACTER u'\x14' # 0x14 -> CONTROL CHARACTER u'\x15' # 0x15 -> CONTROL CHARACTER u'\x16' # 0x16 -> CONTROL CHARACTER u'\x17' # 0x17 -> CONTROL CHARACTER u'\x18' # 0x18 -> CONTROL CHARACTER u'\x19' # 0x19 -> CONTROL CHARACTER u'\x1a' # 0x1A -> CONTROL CHARACTER u'\x1b' # 0x1B -> CONTROL CHARACTER u'\x1c' # 0x1C -> CONTROL CHARACTER u'\x1d' # 0x1D -> CONTROL CHARACTER u'\x1e' # 0x1E -> CONTROL CHARACTER u'\x1f' # 0x1F -> CONTROL CHARACTER u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> CONTROL CHARACTER u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS u'\u2020' # 0xA0 -> DAGGER u'\xb0' # 0xA1 -> DEGREE SIGN u'\xa2' # 0xA2 -> CENT SIGN u'\xa3' # 0xA3 -> POUND SIGN u'\xa7' # 0xA4 -> SECTION SIGN u'\u2022' # 0xA5 -> BULLET u'\xb6' # 0xA6 -> PILCROW SIGN u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S u'\xae' # 0xA8 -> REGISTERED SIGN u'\xa9' # 0xA9 -> COPYRIGHT SIGN u'\u2122' # 0xAA -> TRADE MARK SIGN u'\xb4' # 0xAB -> ACUTE ACCENT u'\xa8' # 0xAC -> DIAERESIS u'\u2260' # 0xAD -> NOT EQUAL TO u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE u'\u221e' # 0xB0 -> INFINITY u'\xb1' # 0xB1 -> PLUS-MINUS SIGN u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO u'\xa5' # 0xB4 -> YEN SIGN u'\xb5' # 0xB5 -> MICRO SIGN u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL u'\u2211' # 0xB7 -> N-ARY SUMMATION u'\u220f' # 0xB8 -> N-ARY PRODUCT u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI u'\u222b' # 0xBA -> INTEGRAL u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA u'\xe6' # 0xBE -> LATIN SMALL LETTER AE u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE u'\xbf' # 0xC0 -> INVERTED QUESTION MARK u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK u'\xac' # 0xC2 -> NOT SIGN u'\u221a' # 0xC3 -> SQUARE ROOT u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK u'\u2248' # 0xC5 -> ALMOST EQUAL TO u'\u2206' # 0xC6 -> INCREMENT u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS u'\xa0' # 0xCA -> NO-BREAK SPACE u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE u'\u2013' # 0xD0 -> EN DASH u'\u2014' # 0xD1 -> EM DASH u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK u'\xf7' # 0xD6 -> DIVISION SIGN u'\u25ca' # 0xD7 -> LOZENGE u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS u'\u011e' # 0xDA -> LATIN CAPITAL LETTER G WITH BREVE u'\u011f' # 0xDB -> LATIN SMALL LETTER G WITH BREVE u'\u0130' # 0xDC -> LATIN CAPITAL LETTER I WITH DOT ABOVE u'\u0131' # 0xDD -> LATIN SMALL LETTER DOTLESS I u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA u'\u015f' # 0xDF -> LATIN SMALL LETTER S WITH CEDILLA u'\u2021' # 0xE0 -> DOUBLE DAGGER u'\xb7' # 0xE1 -> MIDDLE DOT u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK u'\u2030' # 0xE4 -> PER MILLE SIGN u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\uf8ff' # 0xF0 -> Apple logo u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE u'\uf8a0' # 0xF5 -> undefined1 u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT u'\u02dc' # 0xF7 -> SMALL TILDE u'\xaf' # 0xF8 -> MACRON u'\u02d8' # 0xF9 -> BREVE u'\u02d9' # 0xFA -> DOT ABOVE u'\u02da' # 0xFB -> RING ABOVE u'\xb8' # 0xFC -> CEDILLA u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT u'\u02db' # 0xFE -> OGONEK u'\u02c7' # 0xFF -> CARON ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) mac_latin2.py 0000644 00000020565 15053241622 0007137 0 ustar 00 """ Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_map)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='mac-latin2', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON 0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0084: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE 0x0088: 0x0105, # LATIN SMALL LETTER A WITH OGONEK 0x0089: 0x010c, # LATIN CAPITAL LETTER C WITH CARON 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x008b: 0x010d, # LATIN SMALL LETTER C WITH CARON 0x008c: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE 0x008d: 0x0107, # LATIN SMALL LETTER C WITH ACUTE 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE 0x0090: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE 0x0091: 0x010e, # LATIN CAPITAL LETTER D WITH CARON 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE 0x0093: 0x010f, # LATIN SMALL LETTER D WITH CARON 0x0094: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON 0x0095: 0x0113, # LATIN SMALL LETTER E WITH MACRON 0x0096: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x0098: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x009d: 0x011a, # LATIN CAPITAL LETTER E WITH CARON 0x009e: 0x011b, # LATIN SMALL LETTER E WITH CARON 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x00a0: 0x2020, # DAGGER 0x00a1: 0x00b0, # DEGREE SIGN 0x00a2: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK 0x00a4: 0x00a7, # SECTION SIGN 0x00a5: 0x2022, # BULLET 0x00a6: 0x00b6, # PILCROW SIGN 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S 0x00a8: 0x00ae, # REGISTERED SIGN 0x00aa: 0x2122, # TRADE MARK SIGN 0x00ab: 0x0119, # LATIN SMALL LETTER E WITH OGONEK 0x00ac: 0x00a8, # DIAERESIS 0x00ad: 0x2260, # NOT EQUAL TO 0x00ae: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA 0x00af: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK 0x00b0: 0x012f, # LATIN SMALL LETTER I WITH OGONEK 0x00b1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO 0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON 0x00b5: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL 0x00b7: 0x2211, # N-ARY SUMMATION 0x00b8: 0x0142, # LATIN SMALL LETTER L WITH STROKE 0x00b9: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA 0x00ba: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA 0x00bb: 0x013d, # LATIN CAPITAL LETTER L WITH CARON 0x00bc: 0x013e, # LATIN SMALL LETTER L WITH CARON 0x00bd: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE 0x00be: 0x013a, # LATIN SMALL LETTER L WITH ACUTE 0x00bf: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA 0x00c0: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA 0x00c1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE 0x00c2: 0x00ac, # NOT SIGN 0x00c3: 0x221a, # SQUARE ROOT 0x00c4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE 0x00c5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON 0x00c6: 0x2206, # INCREMENT 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS 0x00ca: 0x00a0, # NO-BREAK SPACE 0x00cb: 0x0148, # LATIN SMALL LETTER N WITH CARON 0x00cc: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE 0x00ce: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE 0x00cf: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON 0x00d0: 0x2013, # EN DASH 0x00d1: 0x2014, # EM DASH 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK 0x00d6: 0x00f7, # DIVISION SIGN 0x00d7: 0x25ca, # LOZENGE 0x00d8: 0x014d, # LATIN SMALL LETTER O WITH MACRON 0x00d9: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE 0x00da: 0x0155, # LATIN SMALL LETTER R WITH ACUTE 0x00db: 0x0158, # LATIN CAPITAL LETTER R WITH CARON 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 0x00de: 0x0159, # LATIN SMALL LETTER R WITH CARON 0x00df: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA 0x00e0: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA 0x00e1: 0x0160, # LATIN CAPITAL LETTER S WITH CARON 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK 0x00e4: 0x0161, # LATIN SMALL LETTER S WITH CARON 0x00e5: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE 0x00e6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE 0x00e8: 0x0164, # LATIN CAPITAL LETTER T WITH CARON 0x00e9: 0x0165, # LATIN SMALL LETTER T WITH CARON 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE 0x00eb: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON 0x00ec: 0x017e, # LATIN SMALL LETTER Z WITH CARON 0x00ed: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0x00f0: 0x016b, # LATIN SMALL LETTER U WITH MACRON 0x00f1: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00f3: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE 0x00f4: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE 0x00f5: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE 0x00f6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK 0x00f7: 0x0173, # LATIN SMALL LETTER U WITH OGONEK 0x00f8: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE 0x00f9: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE 0x00fa: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA 0x00fb: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE 0x00fc: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE 0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA 0x00ff: 0x02c7, # CARON }) ### Encoding Map encoding_map = codecs.make_encoding_map(decoding_map) ptcp154.py 0000644 00000021366 15053241622 0006326 0 ustar 00 """ Python Character Mapping Codec generated from 'PTCP154.txt' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_map)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='ptcp154', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x0496, # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER 0x0081: 0x0492, # CYRILLIC CAPITAL LETTER GHE WITH STROKE 0x0082: 0x04ee, # CYRILLIC CAPITAL LETTER U WITH MACRON 0x0083: 0x0493, # CYRILLIC SMALL LETTER GHE WITH STROKE 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK 0x0085: 0x2026, # HORIZONTAL ELLIPSIS 0x0086: 0x04b6, # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER 0x0087: 0x04ae, # CYRILLIC CAPITAL LETTER STRAIGHT U 0x0088: 0x04b2, # CYRILLIC CAPITAL LETTER HA WITH DESCENDER 0x0089: 0x04af, # CYRILLIC SMALL LETTER STRAIGHT U 0x008a: 0x04a0, # CYRILLIC CAPITAL LETTER BASHKIR KA 0x008b: 0x04e2, # CYRILLIC CAPITAL LETTER I WITH MACRON 0x008c: 0x04a2, # CYRILLIC CAPITAL LETTER EN WITH DESCENDER 0x008d: 0x049a, # CYRILLIC CAPITAL LETTER KA WITH DESCENDER 0x008e: 0x04ba, # CYRILLIC CAPITAL LETTER SHHA 0x008f: 0x04b8, # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE 0x0090: 0x0497, # CYRILLIC SMALL LETTER ZHE WITH DESCENDER 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK 0x0095: 0x2022, # BULLET 0x0096: 0x2013, # EN DASH 0x0097: 0x2014, # EM DASH 0x0098: 0x04b3, # CYRILLIC SMALL LETTER HA WITH DESCENDER 0x0099: 0x04b7, # CYRILLIC SMALL LETTER CHE WITH DESCENDER 0x009a: 0x04a1, # CYRILLIC SMALL LETTER BASHKIR KA 0x009b: 0x04e3, # CYRILLIC SMALL LETTER I WITH MACRON 0x009c: 0x04a3, # CYRILLIC SMALL LETTER EN WITH DESCENDER 0x009d: 0x049b, # CYRILLIC SMALL LETTER KA WITH DESCENDER 0x009e: 0x04bb, # CYRILLIC SMALL LETTER SHHA 0x009f: 0x04b9, # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE 0x00a1: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U (Byelorussian) 0x00a2: 0x045e, # CYRILLIC SMALL LETTER SHORT U (Byelorussian) 0x00a3: 0x0408, # CYRILLIC CAPITAL LETTER JE 0x00a4: 0x04e8, # CYRILLIC CAPITAL LETTER BARRED O 0x00a5: 0x0498, # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER 0x00a6: 0x04b0, # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE 0x00a8: 0x0401, # CYRILLIC CAPITAL LETTER IO 0x00aa: 0x04d8, # CYRILLIC CAPITAL LETTER SCHWA 0x00ad: 0x04ef, # CYRILLIC SMALL LETTER U WITH MACRON 0x00af: 0x049c, # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE 0x00b1: 0x04b1, # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE 0x00b2: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I 0x00b3: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 0x00b4: 0x0499, # CYRILLIC SMALL LETTER ZE WITH DESCENDER 0x00b5: 0x04e9, # CYRILLIC SMALL LETTER BARRED O 0x00b8: 0x0451, # CYRILLIC SMALL LETTER IO 0x00b9: 0x2116, # NUMERO SIGN 0x00ba: 0x04d9, # CYRILLIC SMALL LETTER SCHWA 0x00bc: 0x0458, # CYRILLIC SMALL LETTER JE 0x00bd: 0x04aa, # CYRILLIC CAPITAL LETTER ES WITH DESCENDER 0x00be: 0x04ab, # CYRILLIC SMALL LETTER ES WITH DESCENDER 0x00bf: 0x049d, # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE 0x00c0: 0x0410, # CYRILLIC CAPITAL LETTER A 0x00c1: 0x0411, # CYRILLIC CAPITAL LETTER BE 0x00c2: 0x0412, # CYRILLIC CAPITAL LETTER VE 0x00c3: 0x0413, # CYRILLIC CAPITAL LETTER GHE 0x00c4: 0x0414, # CYRILLIC CAPITAL LETTER DE 0x00c5: 0x0415, # CYRILLIC CAPITAL LETTER IE 0x00c6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE 0x00c7: 0x0417, # CYRILLIC CAPITAL LETTER ZE 0x00c8: 0x0418, # CYRILLIC CAPITAL LETTER I 0x00c9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I 0x00ca: 0x041a, # CYRILLIC CAPITAL LETTER KA 0x00cb: 0x041b, # CYRILLIC CAPITAL LETTER EL 0x00cc: 0x041c, # CYRILLIC CAPITAL LETTER EM 0x00cd: 0x041d, # CYRILLIC CAPITAL LETTER EN 0x00ce: 0x041e, # CYRILLIC CAPITAL LETTER O 0x00cf: 0x041f, # CYRILLIC CAPITAL LETTER PE 0x00d0: 0x0420, # CYRILLIC CAPITAL LETTER ER 0x00d1: 0x0421, # CYRILLIC CAPITAL LETTER ES 0x00d2: 0x0422, # CYRILLIC CAPITAL LETTER TE 0x00d3: 0x0423, # CYRILLIC CAPITAL LETTER U 0x00d4: 0x0424, # CYRILLIC CAPITAL LETTER EF 0x00d5: 0x0425, # CYRILLIC CAPITAL LETTER HA 0x00d6: 0x0426, # CYRILLIC CAPITAL LETTER TSE 0x00d7: 0x0427, # CYRILLIC CAPITAL LETTER CHE 0x00d8: 0x0428, # CYRILLIC CAPITAL LETTER SHA 0x00d9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA 0x00da: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN 0x00db: 0x042b, # CYRILLIC CAPITAL LETTER YERU 0x00dc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN 0x00dd: 0x042d, # CYRILLIC CAPITAL LETTER E 0x00de: 0x042e, # CYRILLIC CAPITAL LETTER YU 0x00df: 0x042f, # CYRILLIC CAPITAL LETTER YA 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU 0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA }) ### Encoding Map encoding_map = codecs.make_encoding_map(decoding_map) latin_1.py 0000644 00000002360 15053241622 0006446 0 ustar 00 """ Python 'latin-1' Codec Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. """ import codecs ### Codec APIs class Codec(codecs.Codec): # Note: Binding these as C functions will result in the class not # converting them to methods. This is intended. encode = codecs.latin_1_encode decode = codecs.latin_1_decode class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.latin_1_encode(input,self.errors)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.latin_1_decode(input,self.errors)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass class StreamConverter(StreamWriter,StreamReader): encode = codecs.latin_1_decode decode = codecs.latin_1_encode ### encodings module API def getregentry(): return codecs.CodecInfo( name='iso8859-1', encode=Codec.encode, decode=Codec.decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) cp1258.py 0000644 00000032464 15053241622 0006051 0 ustar 00 """ Python Character Mapping Codec cp1258 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp1258', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Table decoding_table = ( u'\x00' # 0x00 -> NULL u'\x01' # 0x01 -> START OF HEADING u'\x02' # 0x02 -> START OF TEXT u'\x03' # 0x03 -> END OF TEXT u'\x04' # 0x04 -> END OF TRANSMISSION u'\x05' # 0x05 -> ENQUIRY u'\x06' # 0x06 -> ACKNOWLEDGE u'\x07' # 0x07 -> BELL u'\x08' # 0x08 -> BACKSPACE u'\t' # 0x09 -> HORIZONTAL TABULATION u'\n' # 0x0A -> LINE FEED u'\x0b' # 0x0B -> VERTICAL TABULATION u'\x0c' # 0x0C -> FORM FEED u'\r' # 0x0D -> CARRIAGE RETURN u'\x0e' # 0x0E -> SHIFT OUT u'\x0f' # 0x0F -> SHIFT IN u'\x10' # 0x10 -> DATA LINK ESCAPE u'\x11' # 0x11 -> DEVICE CONTROL ONE u'\x12' # 0x12 -> DEVICE CONTROL TWO u'\x13' # 0x13 -> DEVICE CONTROL THREE u'\x14' # 0x14 -> DEVICE CONTROL FOUR u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x16 -> SYNCHRONOUS IDLE u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK u'\x18' # 0x18 -> CANCEL u'\x19' # 0x19 -> END OF MEDIUM u'\x1a' # 0x1A -> SUBSTITUTE u'\x1b' # 0x1B -> ESCAPE u'\x1c' # 0x1C -> FILE SEPARATOR u'\x1d' # 0x1D -> GROUP SEPARATOR u'\x1e' # 0x1E -> RECORD SEPARATOR u'\x1f' # 0x1F -> UNIT SEPARATOR u' ' # 0x20 -> SPACE u'!' # 0x21 -> EXCLAMATION MARK u'"' # 0x22 -> QUOTATION MARK u'#' # 0x23 -> NUMBER SIGN u'$' # 0x24 -> DOLLAR SIGN u'%' # 0x25 -> PERCENT SIGN u'&' # 0x26 -> AMPERSAND u"'" # 0x27 -> APOSTROPHE u'(' # 0x28 -> LEFT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS u'*' # 0x2A -> ASTERISK u'+' # 0x2B -> PLUS SIGN u',' # 0x2C -> COMMA u'-' # 0x2D -> HYPHEN-MINUS u'.' # 0x2E -> FULL STOP u'/' # 0x2F -> SOLIDUS u'0' # 0x30 -> DIGIT ZERO u'1' # 0x31 -> DIGIT ONE u'2' # 0x32 -> DIGIT TWO u'3' # 0x33 -> DIGIT THREE u'4' # 0x34 -> DIGIT FOUR u'5' # 0x35 -> DIGIT FIVE u'6' # 0x36 -> DIGIT SIX u'7' # 0x37 -> DIGIT SEVEN u'8' # 0x38 -> DIGIT EIGHT u'9' # 0x39 -> DIGIT NINE u':' # 0x3A -> COLON u';' # 0x3B -> SEMICOLON u'<' # 0x3C -> LESS-THAN SIGN u'=' # 0x3D -> EQUALS SIGN u'>' # 0x3E -> GREATER-THAN SIGN u'?' # 0x3F -> QUESTION MARK u'@' # 0x40 -> COMMERCIAL AT u'A' # 0x41 -> LATIN CAPITAL LETTER A u'B' # 0x42 -> LATIN CAPITAL LETTER B u'C' # 0x43 -> LATIN CAPITAL LETTER C u'D' # 0x44 -> LATIN CAPITAL LETTER D u'E' # 0x45 -> LATIN CAPITAL LETTER E u'F' # 0x46 -> LATIN CAPITAL LETTER F u'G' # 0x47 -> LATIN CAPITAL LETTER G u'H' # 0x48 -> LATIN CAPITAL LETTER H u'I' # 0x49 -> LATIN CAPITAL LETTER I u'J' # 0x4A -> LATIN CAPITAL LETTER J u'K' # 0x4B -> LATIN CAPITAL LETTER K u'L' # 0x4C -> LATIN CAPITAL LETTER L u'M' # 0x4D -> LATIN CAPITAL LETTER M u'N' # 0x4E -> LATIN CAPITAL LETTER N u'O' # 0x4F -> LATIN CAPITAL LETTER O u'P' # 0x50 -> LATIN CAPITAL LETTER P u'Q' # 0x51 -> LATIN CAPITAL LETTER Q u'R' # 0x52 -> LATIN CAPITAL LETTER R u'S' # 0x53 -> LATIN CAPITAL LETTER S u'T' # 0x54 -> LATIN CAPITAL LETTER T u'U' # 0x55 -> LATIN CAPITAL LETTER U u'V' # 0x56 -> LATIN CAPITAL LETTER V u'W' # 0x57 -> LATIN CAPITAL LETTER W u'X' # 0x58 -> LATIN CAPITAL LETTER X u'Y' # 0x59 -> LATIN CAPITAL LETTER Y u'Z' # 0x5A -> LATIN CAPITAL LETTER Z u'[' # 0x5B -> LEFT SQUARE BRACKET u'\\' # 0x5C -> REVERSE SOLIDUS u']' # 0x5D -> RIGHT SQUARE BRACKET u'^' # 0x5E -> CIRCUMFLEX ACCENT u'_' # 0x5F -> LOW LINE u'`' # 0x60 -> GRAVE ACCENT u'a' # 0x61 -> LATIN SMALL LETTER A u'b' # 0x62 -> LATIN SMALL LETTER B u'c' # 0x63 -> LATIN SMALL LETTER C u'd' # 0x64 -> LATIN SMALL LETTER D u'e' # 0x65 -> LATIN SMALL LETTER E u'f' # 0x66 -> LATIN SMALL LETTER F u'g' # 0x67 -> LATIN SMALL LETTER G u'h' # 0x68 -> LATIN SMALL LETTER H u'i' # 0x69 -> LATIN SMALL LETTER I u'j' # 0x6A -> LATIN SMALL LETTER J u'k' # 0x6B -> LATIN SMALL LETTER K u'l' # 0x6C -> LATIN SMALL LETTER L u'm' # 0x6D -> LATIN SMALL LETTER M u'n' # 0x6E -> LATIN SMALL LETTER N u'o' # 0x6F -> LATIN SMALL LETTER O u'p' # 0x70 -> LATIN SMALL LETTER P u'q' # 0x71 -> LATIN SMALL LETTER Q u'r' # 0x72 -> LATIN SMALL LETTER R u's' # 0x73 -> LATIN SMALL LETTER S u't' # 0x74 -> LATIN SMALL LETTER T u'u' # 0x75 -> LATIN SMALL LETTER U u'v' # 0x76 -> LATIN SMALL LETTER V u'w' # 0x77 -> LATIN SMALL LETTER W u'x' # 0x78 -> LATIN SMALL LETTER X u'y' # 0x79 -> LATIN SMALL LETTER Y u'z' # 0x7A -> LATIN SMALL LETTER Z u'{' # 0x7B -> LEFT CURLY BRACKET u'|' # 0x7C -> VERTICAL LINE u'}' # 0x7D -> RIGHT CURLY BRACKET u'~' # 0x7E -> TILDE u'\x7f' # 0x7F -> DELETE u'\u20ac' # 0x80 -> EURO SIGN u'\ufffe' # 0x81 -> UNDEFINED u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS u'\u2020' # 0x86 -> DAGGER u'\u2021' # 0x87 -> DOUBLE DAGGER u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT u'\u2030' # 0x89 -> PER MILLE SIGN u'\ufffe' # 0x8A -> UNDEFINED u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE u'\ufffe' # 0x8D -> UNDEFINED u'\ufffe' # 0x8E -> UNDEFINED u'\ufffe' # 0x8F -> UNDEFINED u'\ufffe' # 0x90 -> UNDEFINED u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK u'\u2022' # 0x95 -> BULLET u'\u2013' # 0x96 -> EN DASH u'\u2014' # 0x97 -> EM DASH u'\u02dc' # 0x98 -> SMALL TILDE u'\u2122' # 0x99 -> TRADE MARK SIGN u'\ufffe' # 0x9A -> UNDEFINED u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE u'\ufffe' # 0x9D -> UNDEFINED u'\ufffe' # 0x9E -> UNDEFINED u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS u'\xa0' # 0xA0 -> NO-BREAK SPACE u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK u'\xa2' # 0xA2 -> CENT SIGN u'\xa3' # 0xA3 -> POUND SIGN u'\xa4' # 0xA4 -> CURRENCY SIGN u'\xa5' # 0xA5 -> YEN SIGN u'\xa6' # 0xA6 -> BROKEN BAR u'\xa7' # 0xA7 -> SECTION SIGN u'\xa8' # 0xA8 -> DIAERESIS u'\xa9' # 0xA9 -> COPYRIGHT SIGN u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xac' # 0xAC -> NOT SIGN u'\xad' # 0xAD -> SOFT HYPHEN u'\xae' # 0xAE -> REGISTERED SIGN u'\xaf' # 0xAF -> MACRON u'\xb0' # 0xB0 -> DEGREE SIGN u'\xb1' # 0xB1 -> PLUS-MINUS SIGN u'\xb2' # 0xB2 -> SUPERSCRIPT TWO u'\xb3' # 0xB3 -> SUPERSCRIPT THREE u'\xb4' # 0xB4 -> ACUTE ACCENT u'\xb5' # 0xB5 -> MICRO SIGN u'\xb6' # 0xB6 -> PILCROW SIGN u'\xb7' # 0xB7 -> MIDDLE DOT u'\xb8' # 0xB8 -> CEDILLA u'\xb9' # 0xB9 -> SUPERSCRIPT ONE u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS u'\xbf' # 0xBF -> INVERTED QUESTION MARK u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS u'\u0300' # 0xCC -> COMBINING GRAVE ACCENT u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE u'\u0309' # 0xD2 -> COMBINING HOOK ABOVE u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX u'\u01a0' # 0xD5 -> LATIN CAPITAL LETTER O WITH HORN u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xd7' # 0xD7 -> MULTIPLICATION SIGN u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\u01af' # 0xDD -> LATIN CAPITAL LETTER U WITH HORN u'\u0303' # 0xDE -> COMBINING TILDE u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS u'\u0301' # 0xEC -> COMBINING ACUTE ACCENT u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE u'\u0323' # 0xF2 -> COMBINING DOT BELOW u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\u01a1' # 0xF5 -> LATIN SMALL LETTER O WITH HORN u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf7' # 0xF7 -> DIVISION SIGN u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS u'\u01b0' # 0xFD -> LATIN SMALL LETTER U WITH HORN u'\u20ab' # 0xFE -> DONG SIGN u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) ### Encoding table encoding_table=codecs.charmap_build(decoding_table) cp949.py 0000644 00000001777 15053241622 0006002 0 ustar 00 # # cp949.py: Python Unicode Codec for CP949 # # Written by Hye-Shik Chang <perky@FreeBSD.org> # import _codecs_kr, codecs import _multibytecodec as mbc codec = _codecs_kr.getcodec('cp949') class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, codecs.IncrementalEncoder): codec = codec class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, codecs.IncrementalDecoder): codec = codec class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): codec = codec class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): codec = codec def getregentry(): return codecs.CodecInfo( name='cp949', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) cp437.py 0000644 00000104004 15053241622 0005755 0 ustar 00 """ Python Character Mapping Codec cp437 generated from 'VENDORS/MICSFT/PC/CP437.TXT' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp437', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x009b: 0x00a2, # CENT SIGN 0x009c: 0x00a3, # POUND SIGN 0x009d: 0x00a5, # YEN SIGN 0x009e: 0x20a7, # PESETA SIGN 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR 0x00a8: 0x00bf, # INVERTED QUESTION MARK 0x00a9: 0x2310, # REVERSED NOT SIGN 0x00aa: 0x00ac, # NOT SIGN 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00b0: 0x2591, # LIGHT SHADE 0x00b1: 0x2592, # MEDIUM SHADE 0x00b2: 0x2593, # DARK SHADE 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x00db: 0x2588, # FULL BLOCK 0x00dc: 0x2584, # LOWER HALF BLOCK 0x00dd: 0x258c, # LEFT HALF BLOCK 0x00de: 0x2590, # RIGHT HALF BLOCK 0x00df: 0x2580, # UPPER HALF BLOCK 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA 0x00e3: 0x03c0, # GREEK SMALL LETTER PI 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA 0x00e6: 0x00b5, # MICRO SIGN 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA 0x00ec: 0x221e, # INFINITY 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON 0x00ef: 0x2229, # INTERSECTION 0x00f0: 0x2261, # IDENTICAL TO 0x00f1: 0x00b1, # PLUS-MINUS SIGN 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO 0x00f4: 0x2320, # TOP HALF INTEGRAL 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL 0x00f6: 0x00f7, # DIVISION SIGN 0x00f7: 0x2248, # ALMOST EQUAL TO 0x00f8: 0x00b0, # DEGREE SIGN 0x00f9: 0x2219, # BULLET OPERATOR 0x00fa: 0x00b7, # MIDDLE DOT 0x00fb: 0x221a, # SQUARE ROOT 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( u'\x00' # 0x0000 -> NULL u'\x01' # 0x0001 -> START OF HEADING u'\x02' # 0x0002 -> START OF TEXT u'\x03' # 0x0003 -> END OF TEXT u'\x04' # 0x0004 -> END OF TRANSMISSION u'\x05' # 0x0005 -> ENQUIRY u'\x06' # 0x0006 -> ACKNOWLEDGE u'\x07' # 0x0007 -> BELL u'\x08' # 0x0008 -> BACKSPACE u'\t' # 0x0009 -> HORIZONTAL TABULATION u'\n' # 0x000a -> LINE FEED u'\x0b' # 0x000b -> VERTICAL TABULATION u'\x0c' # 0x000c -> FORM FEED u'\r' # 0x000d -> CARRIAGE RETURN u'\x0e' # 0x000e -> SHIFT OUT u'\x0f' # 0x000f -> SHIFT IN u'\x10' # 0x0010 -> DATA LINK ESCAPE u'\x11' # 0x0011 -> DEVICE CONTROL ONE u'\x12' # 0x0012 -> DEVICE CONTROL TWO u'\x13' # 0x0013 -> DEVICE CONTROL THREE u'\x14' # 0x0014 -> DEVICE CONTROL FOUR u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE u'\x16' # 0x0016 -> SYNCHRONOUS IDLE u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK u'\x18' # 0x0018 -> CANCEL u'\x19' # 0x0019 -> END OF MEDIUM u'\x1a' # 0x001a -> SUBSTITUTE u'\x1b' # 0x001b -> ESCAPE u'\x1c' # 0x001c -> FILE SEPARATOR u'\x1d' # 0x001d -> GROUP SEPARATOR u'\x1e' # 0x001e -> RECORD SEPARATOR u'\x1f' # 0x001f -> UNIT SEPARATOR u' ' # 0x0020 -> SPACE u'!' # 0x0021 -> EXCLAMATION MARK u'"' # 0x0022 -> QUOTATION MARK u'#' # 0x0023 -> NUMBER SIGN u'$' # 0x0024 -> DOLLAR SIGN u'%' # 0x0025 -> PERCENT SIGN u'&' # 0x0026 -> AMPERSAND u"'" # 0x0027 -> APOSTROPHE u'(' # 0x0028 -> LEFT PARENTHESIS u')' # 0x0029 -> RIGHT PARENTHESIS u'*' # 0x002a -> ASTERISK u'+' # 0x002b -> PLUS SIGN u',' # 0x002c -> COMMA u'-' # 0x002d -> HYPHEN-MINUS u'.' # 0x002e -> FULL STOP u'/' # 0x002f -> SOLIDUS u'0' # 0x0030 -> DIGIT ZERO u'1' # 0x0031 -> DIGIT ONE u'2' # 0x0032 -> DIGIT TWO u'3' # 0x0033 -> DIGIT THREE u'4' # 0x0034 -> DIGIT FOUR u'5' # 0x0035 -> DIGIT FIVE u'6' # 0x0036 -> DIGIT SIX u'7' # 0x0037 -> DIGIT SEVEN u'8' # 0x0038 -> DIGIT EIGHT u'9' # 0x0039 -> DIGIT NINE u':' # 0x003a -> COLON u';' # 0x003b -> SEMICOLON u'<' # 0x003c -> LESS-THAN SIGN u'=' # 0x003d -> EQUALS SIGN u'>' # 0x003e -> GREATER-THAN SIGN u'?' # 0x003f -> QUESTION MARK u'@' # 0x0040 -> COMMERCIAL AT u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'J' # 0x004a -> LATIN CAPITAL LETTER J u'K' # 0x004b -> LATIN CAPITAL LETTER K u'L' # 0x004c -> LATIN CAPITAL LETTER L u'M' # 0x004d -> LATIN CAPITAL LETTER M u'N' # 0x004e -> LATIN CAPITAL LETTER N u'O' # 0x004f -> LATIN CAPITAL LETTER O u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'[' # 0x005b -> LEFT SQUARE BRACKET u'\\' # 0x005c -> REVERSE SOLIDUS u']' # 0x005d -> RIGHT SQUARE BRACKET u'^' # 0x005e -> CIRCUMFLEX ACCENT u'_' # 0x005f -> LOW LINE u'`' # 0x0060 -> GRAVE ACCENT u'a' # 0x0061 -> LATIN SMALL LETTER A u'b' # 0x0062 -> LATIN SMALL LETTER B u'c' # 0x0063 -> LATIN SMALL LETTER C u'd' # 0x0064 -> LATIN SMALL LETTER D u'e' # 0x0065 -> LATIN SMALL LETTER E u'f' # 0x0066 -> LATIN SMALL LETTER F u'g' # 0x0067 -> LATIN SMALL LETTER G u'h' # 0x0068 -> LATIN SMALL LETTER H u'i' # 0x0069 -> LATIN SMALL LETTER I u'j' # 0x006a -> LATIN SMALL LETTER J u'k' # 0x006b -> LATIN SMALL LETTER K u'l' # 0x006c -> LATIN SMALL LETTER L u'm' # 0x006d -> LATIN SMALL LETTER M u'n' # 0x006e -> LATIN SMALL LETTER N u'o' # 0x006f -> LATIN SMALL LETTER O u'p' # 0x0070 -> LATIN SMALL LETTER P u'q' # 0x0071 -> LATIN SMALL LETTER Q u'r' # 0x0072 -> LATIN SMALL LETTER R u's' # 0x0073 -> LATIN SMALL LETTER S u't' # 0x0074 -> LATIN SMALL LETTER T u'u' # 0x0075 -> LATIN SMALL LETTER U u'v' # 0x0076 -> LATIN SMALL LETTER V u'w' # 0x0077 -> LATIN SMALL LETTER W u'x' # 0x0078 -> LATIN SMALL LETTER X u'y' # 0x0079 -> LATIN SMALL LETTER Y u'z' # 0x007a -> LATIN SMALL LETTER Z u'{' # 0x007b -> LEFT CURLY BRACKET u'|' # 0x007c -> VERTICAL LINE u'}' # 0x007d -> RIGHT CURLY BRACKET u'~' # 0x007e -> TILDE u'\x7f' # 0x007f -> DELETE u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS u'\xa2' # 0x009b -> CENT SIGN u'\xa3' # 0x009c -> POUND SIGN u'\xa5' # 0x009d -> YEN SIGN u'\u20a7' # 0x009e -> PESETA SIGN u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK u'\u2310' # 0x00a9 -> REVERSED NOT SIGN u'\xac' # 0x00aa -> NOT SIGN u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\u2591' # 0x00b0 -> LIGHT SHADE u'\u2592' # 0x00b1 -> MEDIUM SHADE u'\u2593' # 0x00b2 -> DARK SHADE u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT u'\u2588' # 0x00db -> FULL BLOCK u'\u2584' # 0x00dc -> LOWER HALF BLOCK u'\u258c' # 0x00dd -> LEFT HALF BLOCK u'\u2590' # 0x00de -> RIGHT HALF BLOCK u'\u2580' # 0x00df -> UPPER HALF BLOCK u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA u'\xb5' # 0x00e6 -> MICRO SIGN u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA u'\u221e' # 0x00ec -> INFINITY u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON u'\u2229' # 0x00ef -> INTERSECTION u'\u2261' # 0x00f0 -> IDENTICAL TO u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL u'\xf7' # 0x00f6 -> DIVISION SIGN u'\u2248' # 0x00f7 -> ALMOST EQUAL TO u'\xb0' # 0x00f8 -> DEGREE SIGN u'\u2219' # 0x00f9 -> BULLET OPERATOR u'\xb7' # 0x00fa -> MIDDLE DOT u'\u221a' # 0x00fb -> SQUARE ROOT u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N u'\xb2' # 0x00fd -> SUPERSCRIPT TWO u'\u25a0' # 0x00fe -> BLACK SQUARE u'\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # NULL 0x0001: 0x0001, # START OF HEADING 0x0002: 0x0002, # START OF TEXT 0x0003: 0x0003, # END OF TEXT 0x0004: 0x0004, # END OF TRANSMISSION 0x0005: 0x0005, # ENQUIRY 0x0006: 0x0006, # ACKNOWLEDGE 0x0007: 0x0007, # BELL 0x0008: 0x0008, # BACKSPACE 0x0009: 0x0009, # HORIZONTAL TABULATION 0x000a: 0x000a, # LINE FEED 0x000b: 0x000b, # VERTICAL TABULATION 0x000c: 0x000c, # FORM FEED 0x000d: 0x000d, # CARRIAGE RETURN 0x000e: 0x000e, # SHIFT OUT 0x000f: 0x000f, # SHIFT IN 0x0010: 0x0010, # DATA LINK ESCAPE 0x0011: 0x0011, # DEVICE CONTROL ONE 0x0012: 0x0012, # DEVICE CONTROL TWO 0x0013: 0x0013, # DEVICE CONTROL THREE 0x0014: 0x0014, # DEVICE CONTROL FOUR 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE 0x0016: 0x0016, # SYNCHRONOUS IDLE 0x0017: 0x0017, # END OF TRANSMISSION BLOCK 0x0018: 0x0018, # CANCEL 0x0019: 0x0019, # END OF MEDIUM 0x001a: 0x001a, # SUBSTITUTE 0x001b: 0x001b, # ESCAPE 0x001c: 0x001c, # FILE SEPARATOR 0x001d: 0x001d, # GROUP SEPARATOR 0x001e: 0x001e, # RECORD SEPARATOR 0x001f: 0x001f, # UNIT SEPARATOR 0x0020: 0x0020, # SPACE 0x0021: 0x0021, # EXCLAMATION MARK 0x0022: 0x0022, # QUOTATION MARK 0x0023: 0x0023, # NUMBER SIGN 0x0024: 0x0024, # DOLLAR SIGN 0x0025: 0x0025, # PERCENT SIGN 0x0026: 0x0026, # AMPERSAND 0x0027: 0x0027, # APOSTROPHE 0x0028: 0x0028, # LEFT PARENTHESIS 0x0029: 0x0029, # RIGHT PARENTHESIS 0x002a: 0x002a, # ASTERISK 0x002b: 0x002b, # PLUS SIGN 0x002c: 0x002c, # COMMA 0x002d: 0x002d, # HYPHEN-MINUS 0x002e: 0x002e, # FULL STOP 0x002f: 0x002f, # SOLIDUS 0x0030: 0x0030, # DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE 0x0032: 0x0032, # DIGIT TWO 0x0033: 0x0033, # DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE 0x003a: 0x003a, # COLON 0x003b: 0x003b, # SEMICOLON 0x003c: 0x003c, # LESS-THAN SIGN 0x003d: 0x003d, # EQUALS SIGN 0x003e: 0x003e, # GREATER-THAN SIGN 0x003f: 0x003f, # QUESTION MARK 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005c: 0x005c, # REVERSE SOLIDUS 0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005f: 0x005f, # LOW LINE 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET 0x007c: 0x007c, # VERTICAL LINE 0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # DELETE 0x00a0: 0x00ff, # NO-BREAK SPACE 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK 0x00a2: 0x009b, # CENT SIGN 0x00a3: 0x009c, # POUND SIGN 0x00a5: 0x009d, # YEN SIGN 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00ac: 0x00aa, # NOT SIGN 0x00b0: 0x00f8, # DEGREE SIGN 0x00b1: 0x00f1, # PLUS-MINUS SIGN 0x00b2: 0x00fd, # SUPERSCRIPT TWO 0x00b5: 0x00e6, # MICRO SIGN 0x00b7: 0x00fa, # MIDDLE DOT 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF 0x00bf: 0x00a8, # INVERTED QUESTION MARK 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS 0x00f7: 0x00f6, # DIVISION SIGN 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON 0x03c0: 0x00e3, # GREEK SMALL LETTER PI 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N 0x20a7: 0x009e, # PESETA SIGN 0x2219: 0x00f9, # BULLET OPERATOR 0x221a: 0x00fb, # SQUARE ROOT 0x221e: 0x00ec, # INFINITY 0x2229: 0x00ef, # INTERSECTION 0x2248: 0x00f7, # ALMOST EQUAL TO 0x2261: 0x00f0, # IDENTICAL TO 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO 0x2310: 0x00a9, # REVERSED NOT SIGN 0x2320: 0x00f4, # TOP HALF INTEGRAL 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x2580: 0x00df, # UPPER HALF BLOCK 0x2584: 0x00dc, # LOWER HALF BLOCK 0x2588: 0x00db, # FULL BLOCK 0x258c: 0x00dd, # LEFT HALF BLOCK 0x2590: 0x00de, # RIGHT HALF BLOCK 0x2591: 0x00b0, # LIGHT SHADE 0x2592: 0x00b1, # MEDIUM SHADE 0x2593: 0x00b2, # DARK SHADE 0x25a0: 0x00fe, # BLACK SQUARE } base64_codec.py 0000644 00000004503 15053241622 0007341 0 ustar 00 """ Python 'base64_codec' Codec - base64 content transfer encoding Unlike most of the other codecs which target Unicode, this codec will return Python string objects for both encode and decode. Written by Marc-Andre Lemburg (mal@lemburg.com). """ import codecs, base64 ### Codec APIs def base64_encode(input,errors='strict'): """ Encodes the object input and returns a tuple (output object, length consumed). errors defines the error handling to apply. It defaults to 'strict' handling which is the only currently supported error handling for this codec. """ assert errors == 'strict' output = base64.encodestring(input) return (output, len(input)) def base64_decode(input,errors='strict'): """ Decodes the object input and returns a tuple (output object, length consumed). input must be an object which provides the bf_getreadbuf buffer slot. Python strings, buffer objects and memory mapped files are examples of objects providing this slot. errors defines the error handling to apply. It defaults to 'strict' handling which is the only currently supported error handling for this codec. """ assert errors == 'strict' output = base64.decodestring(input) return (output, len(input)) class Codec(codecs.Codec): def encode(self, input,errors='strict'): return base64_encode(input,errors) def decode(self, input,errors='strict'): return base64_decode(input,errors) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): assert self.errors == 'strict' return base64.encodestring(input) class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): assert self.errors == 'strict' return base64.decodestring(input) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='base64', encode=base64_encode, decode=base64_decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, _is_text_encoding=False, ) utf_8.py 0000644 00000001755 15053241622 0006153 0 ustar 00 """ Python 'utf-8' Codec Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. """ import codecs ### Codec APIs encode = codecs.utf_8_encode def decode(input, errors='strict'): return codecs.utf_8_decode(input, errors, True) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.utf_8_encode(input, self.errors)[0] class IncrementalDecoder(codecs.BufferedIncrementalDecoder): _buffer_decode = codecs.utf_8_decode class StreamWriter(codecs.StreamWriter): encode = codecs.utf_8_encode class StreamReader(codecs.StreamReader): decode = codecs.utf_8_decode ### encodings module API def getregentry(): return codecs.CodecInfo( name='utf-8', encode=encode, decode=decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) euc_jis_2004.py 0000644 00000002033 15053241622 0007202 0 ustar 00 # # euc_jis_2004.py: Python Unicode Codec for EUC_JIS_2004 # # Written by Hye-Shik Chang <perky@FreeBSD.org> # import _codecs_jp, codecs import _multibytecodec as mbc codec = _codecs_jp.getcodec('euc_jis_2004') class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, codecs.IncrementalEncoder): codec = codec class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, codecs.IncrementalDecoder): codec = codec class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): codec = codec class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): codec = codec def getregentry(): return codecs.CodecInfo( name='euc_jis_2004', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) idna.py 0000644 00000021476 15053241622 0006043 0 ustar 00 # This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep) import stringprep, re, codecs from unicodedata import ucd_3_2_0 as unicodedata # IDNA section 3.1 dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]") # IDNA section 5 ace_prefix = "xn--" uace_prefix = unicode(ace_prefix, "ascii") # This assumes query strings, so AllowUnassigned is true def nameprep(label): # Map newlabel = [] for c in label: if stringprep.in_table_b1(c): # Map to nothing continue newlabel.append(stringprep.map_table_b2(c)) label = u"".join(newlabel) # Normalize label = unicodedata.normalize("NFKC", label) # Prohibit for c in label: if stringprep.in_table_c12(c) or \ stringprep.in_table_c22(c) or \ stringprep.in_table_c3(c) or \ stringprep.in_table_c4(c) or \ stringprep.in_table_c5(c) or \ stringprep.in_table_c6(c) or \ stringprep.in_table_c7(c) or \ stringprep.in_table_c8(c) or \ stringprep.in_table_c9(c): raise UnicodeError("Invalid character %r" % c) # Check bidi RandAL = map(stringprep.in_table_d1, label) if any(RandAL): # There is a RandAL char in the string. Must perform further # tests: # 1) The characters in section 5.8 MUST be prohibited. # This is table C.8, which was already checked # 2) If a string contains any RandALCat character, the string # MUST NOT contain any LCat character. if any(stringprep.in_table_d2(x) for x in label): raise UnicodeError("Violation of BIDI requirement 2") # 3) If a string contains any RandALCat character, a # RandALCat character MUST be the first character of the # string, and a RandALCat character MUST be the last # character of the string. if not RandAL[0] or not RandAL[-1]: raise UnicodeError("Violation of BIDI requirement 3") return label def ToASCII(label): try: # Step 1: try ASCII label = label.encode("ascii") except UnicodeError: pass else: # Skip to step 3: UseSTD3ASCIIRules is false, so # Skip to step 8. if 0 < len(label) < 64: return label raise UnicodeError("label empty or too long") # Step 2: nameprep label = nameprep(label) # Step 3: UseSTD3ASCIIRules is false # Step 4: try ASCII try: label = label.encode("ascii") except UnicodeError: pass else: # Skip to step 8. if 0 < len(label) < 64: return label raise UnicodeError("label empty or too long") # Step 5: Check ACE prefix if label.startswith(uace_prefix): raise UnicodeError("Label starts with ACE prefix") # Step 6: Encode with PUNYCODE label = label.encode("punycode") # Step 7: Prepend ACE prefix label = ace_prefix + label # Step 8: Check size if 0 < len(label) < 64: return label raise UnicodeError("label empty or too long") def ToUnicode(label): if len(label) > 1024: # Protection from https://github.com/python/cpython/issues/98433. # https://datatracker.ietf.org/doc/html/rfc5894#section-6 # doesn't specify a label size limit prior to NAMEPREP. But having # one makes practical sense. # This leaves ample room for nameprep() to remove Nothing characters # per https://www.rfc-editor.org/rfc/rfc3454#section-3.1 while still # preventing us from wasting time decoding a big thing that'll just # hit the actual <= 63 length limit in Step 6. raise UnicodeError("label way too long") # Step 1: Check for ASCII if isinstance(label, str): pure_ascii = True else: try: label = label.encode("ascii") pure_ascii = True except UnicodeError: pure_ascii = False if not pure_ascii: # Step 2: Perform nameprep label = nameprep(label) # It doesn't say this, but apparently, it should be ASCII now try: label = label.encode("ascii") except UnicodeError: raise UnicodeError("Invalid character in IDN label") # Step 3: Check for ACE prefix if not label.startswith(ace_prefix): return unicode(label, "ascii") # Step 4: Remove ACE prefix label1 = label[len(ace_prefix):] # Step 5: Decode using PUNYCODE result = label1.decode("punycode") # Step 6: Apply ToASCII label2 = ToASCII(result) # Step 7: Compare the result of step 6 with the one of step 3 # label2 will already be in lower case. if label.lower() != label2: raise UnicodeError("IDNA does not round-trip", label, label2) # Step 8: return the result of step 5 return result ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): if errors != 'strict': # IDNA is quite clear that implementations must be strict raise UnicodeError("unsupported error handling "+errors) if not input: return "", 0 result = [] labels = dots.split(input) if labels and len(labels[-1])==0: trailing_dot = '.' del labels[-1] else: trailing_dot = '' for label in labels: result.append(ToASCII(label)) # Join with U+002E return ".".join(result)+trailing_dot, len(input) def decode(self,input,errors='strict'): if errors != 'strict': raise UnicodeError("Unsupported error handling "+errors) if not input: return u"", 0 # IDNA allows decoding to operate on Unicode strings, too. if isinstance(input, unicode): labels = dots.split(input) else: # Must be ASCII string input = str(input) unicode(input, "ascii") labels = input.split(".") if labels and len(labels[-1]) == 0: trailing_dot = u'.' del labels[-1] else: trailing_dot = u'' result = [] for label in labels: result.append(ToUnicode(label)) return u".".join(result)+trailing_dot, len(input) class IncrementalEncoder(codecs.BufferedIncrementalEncoder): def _buffer_encode(self, input, errors, final): if errors != 'strict': # IDNA is quite clear that implementations must be strict raise UnicodeError("unsupported error handling "+errors) if not input: return ("", 0) labels = dots.split(input) trailing_dot = u'' if labels: if not labels[-1]: trailing_dot = '.' del labels[-1] elif not final: # Keep potentially unfinished label until the next call del labels[-1] if labels: trailing_dot = '.' result = [] size = 0 for label in labels: result.append(ToASCII(label)) if size: size += 1 size += len(label) # Join with U+002E result = ".".join(result) + trailing_dot size += len(trailing_dot) return (result, size) class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def _buffer_decode(self, input, errors, final): if errors != 'strict': raise UnicodeError("Unsupported error handling "+errors) if not input: return (u"", 0) # IDNA allows decoding to operate on Unicode strings, too. if isinstance(input, unicode): labels = dots.split(input) else: # Must be ASCII string input = str(input) unicode(input, "ascii") labels = input.split(".") trailing_dot = u'' if labels: if not labels[-1]: trailing_dot = u'.' del labels[-1] elif not final: # Keep potentially unfinished label until the next call del labels[-1] if labels: trailing_dot = u'.' result = [] size = 0 for label in labels: result.append(ToUnicode(label)) if size: size += 1 size += len(label) result = u".".join(result) + trailing_dot size += len(trailing_dot) return (result, size) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='idna', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, ) cp1125.py 0000644 00000103445 15053241622 0006040 0 ustar 00 """ Python Character Mapping Codec for CP1125 """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_map)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_table)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='cp1125', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE 0x00b0: 0x2591, # LIGHT SHADE 0x00b1: 0x2592, # MEDIUM SHADE 0x00b2: 0x2593, # DARK SHADE 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x00db: 0x2588, # FULL BLOCK 0x00dc: 0x2584, # LOWER HALF BLOCK 0x00dd: 0x258c, # LEFT HALF BLOCK 0x00de: 0x2590, # RIGHT HALF BLOCK 0x00df: 0x2580, # UPPER HALF BLOCK 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO 0x00f2: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN 0x00f3: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN 0x00f4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE 0x00f5: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE 0x00f6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I 0x00f7: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 0x00f8: 0x0407, # CYRILLIC CAPITAL LETTER YI 0x00f9: 0x0457, # CYRILLIC SMALL LETTER YI 0x00fa: 0x00b7, # MIDDLE DOT 0x00fb: 0x221a, # SQUARE ROOT 0x00fc: 0x2116, # NUMERO SIGN 0x00fd: 0x00a4, # CURRENCY SIGN 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE }) ### Decoding Table decoding_table = ( '\x00' # 0x0000 -> NULL '\x01' # 0x0001 -> START OF HEADING '\x02' # 0x0002 -> START OF TEXT '\x03' # 0x0003 -> END OF TEXT '\x04' # 0x0004 -> END OF TRANSMISSION '\x05' # 0x0005 -> ENQUIRY '\x06' # 0x0006 -> ACKNOWLEDGE '\x07' # 0x0007 -> BELL '\x08' # 0x0008 -> BACKSPACE '\t' # 0x0009 -> HORIZONTAL TABULATION '\n' # 0x000a -> LINE FEED '\x0b' # 0x000b -> VERTICAL TABULATION '\x0c' # 0x000c -> FORM FEED '\r' # 0x000d -> CARRIAGE RETURN '\x0e' # 0x000e -> SHIFT OUT '\x0f' # 0x000f -> SHIFT IN '\x10' # 0x0010 -> DATA LINK ESCAPE '\x11' # 0x0011 -> DEVICE CONTROL ONE '\x12' # 0x0012 -> DEVICE CONTROL TWO '\x13' # 0x0013 -> DEVICE CONTROL THREE '\x14' # 0x0014 -> DEVICE CONTROL FOUR '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE '\x16' # 0x0016 -> SYNCHRONOUS IDLE '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK '\x18' # 0x0018 -> CANCEL '\x19' # 0x0019 -> END OF MEDIUM '\x1a' # 0x001a -> SUBSTITUTE '\x1b' # 0x001b -> ESCAPE '\x1c' # 0x001c -> FILE SEPARATOR '\x1d' # 0x001d -> GROUP SEPARATOR '\x1e' # 0x001e -> RECORD SEPARATOR '\x1f' # 0x001f -> UNIT SEPARATOR ' ' # 0x0020 -> SPACE '!' # 0x0021 -> EXCLAMATION MARK '"' # 0x0022 -> QUOTATION MARK '#' # 0x0023 -> NUMBER SIGN '$' # 0x0024 -> DOLLAR SIGN '%' # 0x0025 -> PERCENT SIGN '&' # 0x0026 -> AMPERSAND "'" # 0x0027 -> APOSTROPHE '(' # 0x0028 -> LEFT PARENTHESIS ')' # 0x0029 -> RIGHT PARENTHESIS '*' # 0x002a -> ASTERISK '+' # 0x002b -> PLUS SIGN ',' # 0x002c -> COMMA '-' # 0x002d -> HYPHEN-MINUS '.' # 0x002e -> FULL STOP '/' # 0x002f -> SOLIDUS '0' # 0x0030 -> DIGIT ZERO '1' # 0x0031 -> DIGIT ONE '2' # 0x0032 -> DIGIT TWO '3' # 0x0033 -> DIGIT THREE '4' # 0x0034 -> DIGIT FOUR '5' # 0x0035 -> DIGIT FIVE '6' # 0x0036 -> DIGIT SIX '7' # 0x0037 -> DIGIT SEVEN '8' # 0x0038 -> DIGIT EIGHT '9' # 0x0039 -> DIGIT NINE ':' # 0x003a -> COLON ';' # 0x003b -> SEMICOLON '<' # 0x003c -> LESS-THAN SIGN '=' # 0x003d -> EQUALS SIGN '>' # 0x003e -> GREATER-THAN SIGN '?' # 0x003f -> QUESTION MARK '@' # 0x0040 -> COMMERCIAL AT 'A' # 0x0041 -> LATIN CAPITAL LETTER A 'B' # 0x0042 -> LATIN CAPITAL LETTER B 'C' # 0x0043 -> LATIN CAPITAL LETTER C 'D' # 0x0044 -> LATIN CAPITAL LETTER D 'E' # 0x0045 -> LATIN CAPITAL LETTER E 'F' # 0x0046 -> LATIN CAPITAL LETTER F 'G' # 0x0047 -> LATIN CAPITAL LETTER G 'H' # 0x0048 -> LATIN CAPITAL LETTER H 'I' # 0x0049 -> LATIN CAPITAL LETTER I 'J' # 0x004a -> LATIN CAPITAL LETTER J 'K' # 0x004b -> LATIN CAPITAL LETTER K 'L' # 0x004c -> LATIN CAPITAL LETTER L 'M' # 0x004d -> LATIN CAPITAL LETTER M 'N' # 0x004e -> LATIN CAPITAL LETTER N 'O' # 0x004f -> LATIN CAPITAL LETTER O 'P' # 0x0050 -> LATIN CAPITAL LETTER P 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q 'R' # 0x0052 -> LATIN CAPITAL LETTER R 'S' # 0x0053 -> LATIN CAPITAL LETTER S 'T' # 0x0054 -> LATIN CAPITAL LETTER T 'U' # 0x0055 -> LATIN CAPITAL LETTER U 'V' # 0x0056 -> LATIN CAPITAL LETTER V 'W' # 0x0057 -> LATIN CAPITAL LETTER W 'X' # 0x0058 -> LATIN CAPITAL LETTER X 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y 'Z' # 0x005a -> LATIN CAPITAL LETTER Z '[' # 0x005b -> LEFT SQUARE BRACKET '\\' # 0x005c -> REVERSE SOLIDUS ']' # 0x005d -> RIGHT SQUARE BRACKET '^' # 0x005e -> CIRCUMFLEX ACCENT '_' # 0x005f -> LOW LINE '`' # 0x0060 -> GRAVE ACCENT 'a' # 0x0061 -> LATIN SMALL LETTER A 'b' # 0x0062 -> LATIN SMALL LETTER B 'c' # 0x0063 -> LATIN SMALL LETTER C 'd' # 0x0064 -> LATIN SMALL LETTER D 'e' # 0x0065 -> LATIN SMALL LETTER E 'f' # 0x0066 -> LATIN SMALL LETTER F 'g' # 0x0067 -> LATIN SMALL LETTER G 'h' # 0x0068 -> LATIN SMALL LETTER H 'i' # 0x0069 -> LATIN SMALL LETTER I 'j' # 0x006a -> LATIN SMALL LETTER J 'k' # 0x006b -> LATIN SMALL LETTER K 'l' # 0x006c -> LATIN SMALL LETTER L 'm' # 0x006d -> LATIN SMALL LETTER M 'n' # 0x006e -> LATIN SMALL LETTER N 'o' # 0x006f -> LATIN SMALL LETTER O 'p' # 0x0070 -> LATIN SMALL LETTER P 'q' # 0x0071 -> LATIN SMALL LETTER Q 'r' # 0x0072 -> LATIN SMALL LETTER R 's' # 0x0073 -> LATIN SMALL LETTER S 't' # 0x0074 -> LATIN SMALL LETTER T 'u' # 0x0075 -> LATIN SMALL LETTER U 'v' # 0x0076 -> LATIN SMALL LETTER V 'w' # 0x0077 -> LATIN SMALL LETTER W 'x' # 0x0078 -> LATIN SMALL LETTER X 'y' # 0x0079 -> LATIN SMALL LETTER Y 'z' # 0x007a -> LATIN SMALL LETTER Z '{' # 0x007b -> LEFT CURLY BRACKET '|' # 0x007c -> VERTICAL LINE '}' # 0x007d -> RIGHT CURLY BRACKET '~' # 0x007e -> TILDE '\x7f' # 0x007f -> DELETE '\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A '\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE '\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE '\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE '\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE '\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE '\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE '\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE '\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I '\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I '\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA '\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL '\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM '\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN '\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O '\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE '\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER '\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES '\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE '\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U '\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF '\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA '\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE '\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE '\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA '\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA '\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN '\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU '\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN '\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E '\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU '\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA '\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A '\u0431' # 0x00a1 -> CYRILLIC SMALL LETTER BE '\u0432' # 0x00a2 -> CYRILLIC SMALL LETTER VE '\u0433' # 0x00a3 -> CYRILLIC SMALL LETTER GHE '\u0434' # 0x00a4 -> CYRILLIC SMALL LETTER DE '\u0435' # 0x00a5 -> CYRILLIC SMALL LETTER IE '\u0436' # 0x00a6 -> CYRILLIC SMALL LETTER ZHE '\u0437' # 0x00a7 -> CYRILLIC SMALL LETTER ZE '\u0438' # 0x00a8 -> CYRILLIC SMALL LETTER I '\u0439' # 0x00a9 -> CYRILLIC SMALL LETTER SHORT I '\u043a' # 0x00aa -> CYRILLIC SMALL LETTER KA '\u043b' # 0x00ab -> CYRILLIC SMALL LETTER EL '\u043c' # 0x00ac -> CYRILLIC SMALL LETTER EM '\u043d' # 0x00ad -> CYRILLIC SMALL LETTER EN '\u043e' # 0x00ae -> CYRILLIC SMALL LETTER O '\u043f' # 0x00af -> CYRILLIC SMALL LETTER PE '\u2591' # 0x00b0 -> LIGHT SHADE '\u2592' # 0x00b1 -> MEDIUM SHADE '\u2593' # 0x00b2 -> DARK SHADE '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT '\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE '\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE '\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE '\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT '\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE '\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL '\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE '\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL '\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE '\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE '\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE '\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE '\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE '\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE '\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE '\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE '\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE '\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT '\u2588' # 0x00db -> FULL BLOCK '\u2584' # 0x00dc -> LOWER HALF BLOCK '\u258c' # 0x00dd -> LEFT HALF BLOCK '\u2590' # 0x00de -> RIGHT HALF BLOCK '\u2580' # 0x00df -> UPPER HALF BLOCK '\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER '\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES '\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE '\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U '\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF '\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA '\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE '\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE '\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA '\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA '\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN '\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU '\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN '\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E '\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU '\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA '\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO '\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO '\u0490' # 0x00f2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN '\u0491' # 0x00f3 -> CYRILLIC SMALL LETTER GHE WITH UPTURN '\u0404' # 0x00f4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE '\u0454' # 0x00f5 -> CYRILLIC SMALL LETTER UKRAINIAN IE '\u0406' # 0x00f6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I '\u0456' # 0x00f7 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I '\u0407' # 0x00f8 -> CYRILLIC CAPITAL LETTER YI '\u0457' # 0x00f9 -> CYRILLIC SMALL LETTER YI '\xb7' # 0x00fa -> MIDDLE DOT '\u221a' # 0x00fb -> SQUARE ROOT '\u2116' # 0x00fc -> NUMERO SIGN '\xa4' # 0x00fd -> CURRENCY SIGN '\u25a0' # 0x00fe -> BLACK SQUARE '\xa0' # 0x00ff -> NO-BREAK SPACE ) ### Encoding Map encoding_map = { 0x0000: 0x0000, # NULL 0x0001: 0x0001, # START OF HEADING 0x0002: 0x0002, # START OF TEXT 0x0003: 0x0003, # END OF TEXT 0x0004: 0x0004, # END OF TRANSMISSION 0x0005: 0x0005, # ENQUIRY 0x0006: 0x0006, # ACKNOWLEDGE 0x0007: 0x0007, # BELL 0x0008: 0x0008, # BACKSPACE 0x0009: 0x0009, # HORIZONTAL TABULATION 0x000a: 0x000a, # LINE FEED 0x000b: 0x000b, # VERTICAL TABULATION 0x000c: 0x000c, # FORM FEED 0x000d: 0x000d, # CARRIAGE RETURN 0x000e: 0x000e, # SHIFT OUT 0x000f: 0x000f, # SHIFT IN 0x0010: 0x0010, # DATA LINK ESCAPE 0x0011: 0x0011, # DEVICE CONTROL ONE 0x0012: 0x0012, # DEVICE CONTROL TWO 0x0013: 0x0013, # DEVICE CONTROL THREE 0x0014: 0x0014, # DEVICE CONTROL FOUR 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE 0x0016: 0x0016, # SYNCHRONOUS IDLE 0x0017: 0x0017, # END OF TRANSMISSION BLOCK 0x0018: 0x0018, # CANCEL 0x0019: 0x0019, # END OF MEDIUM 0x001a: 0x001a, # SUBSTITUTE 0x001b: 0x001b, # ESCAPE 0x001c: 0x001c, # FILE SEPARATOR 0x001d: 0x001d, # GROUP SEPARATOR 0x001e: 0x001e, # RECORD SEPARATOR 0x001f: 0x001f, # UNIT SEPARATOR 0x0020: 0x0020, # SPACE 0x0021: 0x0021, # EXCLAMATION MARK 0x0022: 0x0022, # QUOTATION MARK 0x0023: 0x0023, # NUMBER SIGN 0x0024: 0x0024, # DOLLAR SIGN 0x0025: 0x0025, # PERCENT SIGN 0x0026: 0x0026, # AMPERSAND 0x0027: 0x0027, # APOSTROPHE 0x0028: 0x0028, # LEFT PARENTHESIS 0x0029: 0x0029, # RIGHT PARENTHESIS 0x002a: 0x002a, # ASTERISK 0x002b: 0x002b, # PLUS SIGN 0x002c: 0x002c, # COMMA 0x002d: 0x002d, # HYPHEN-MINUS 0x002e: 0x002e, # FULL STOP 0x002f: 0x002f, # SOLIDUS 0x0030: 0x0030, # DIGIT ZERO 0x0031: 0x0031, # DIGIT ONE 0x0032: 0x0032, # DIGIT TWO 0x0033: 0x0033, # DIGIT THREE 0x0034: 0x0034, # DIGIT FOUR 0x0035: 0x0035, # DIGIT FIVE 0x0036: 0x0036, # DIGIT SIX 0x0037: 0x0037, # DIGIT SEVEN 0x0038: 0x0038, # DIGIT EIGHT 0x0039: 0x0039, # DIGIT NINE 0x003a: 0x003a, # COLON 0x003b: 0x003b, # SEMICOLON 0x003c: 0x003c, # LESS-THAN SIGN 0x003d: 0x003d, # EQUALS SIGN 0x003e: 0x003e, # GREATER-THAN SIGN 0x003f: 0x003f, # QUESTION MARK 0x0040: 0x0040, # COMMERCIAL AT 0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005c: 0x005c, # REVERSE SOLIDUS 0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005f: 0x005f, # LOW LINE 0x0060: 0x0060, # GRAVE ACCENT 0x0061: 0x0061, # LATIN SMALL LETTER A 0x0062: 0x0062, # LATIN SMALL LETTER B 0x0063: 0x0063, # LATIN SMALL LETTER C 0x0064: 0x0064, # LATIN SMALL LETTER D 0x0065: 0x0065, # LATIN SMALL LETTER E 0x0066: 0x0066, # LATIN SMALL LETTER F 0x0067: 0x0067, # LATIN SMALL LETTER G 0x0068: 0x0068, # LATIN SMALL LETTER H 0x0069: 0x0069, # LATIN SMALL LETTER I 0x006a: 0x006a, # LATIN SMALL LETTER J 0x006b: 0x006b, # LATIN SMALL LETTER K 0x006c: 0x006c, # LATIN SMALL LETTER L 0x006d: 0x006d, # LATIN SMALL LETTER M 0x006e: 0x006e, # LATIN SMALL LETTER N 0x006f: 0x006f, # LATIN SMALL LETTER O 0x0070: 0x0070, # LATIN SMALL LETTER P 0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0072: 0x0072, # LATIN SMALL LETTER R 0x0073: 0x0073, # LATIN SMALL LETTER S 0x0074: 0x0074, # LATIN SMALL LETTER T 0x0075: 0x0075, # LATIN SMALL LETTER U 0x0076: 0x0076, # LATIN SMALL LETTER V 0x0077: 0x0077, # LATIN SMALL LETTER W 0x0078: 0x0078, # LATIN SMALL LETTER X 0x0079: 0x0079, # LATIN SMALL LETTER Y 0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007b: 0x007b, # LEFT CURLY BRACKET 0x007c: 0x007c, # VERTICAL LINE 0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007e: 0x007e, # TILDE 0x007f: 0x007f, # DELETE 0x00a0: 0x00ff, # NO-BREAK SPACE 0x00a4: 0x00fd, # CURRENCY SIGN 0x00b7: 0x00fa, # MIDDLE DOT 0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO 0x0404: 0x00f4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE 0x0406: 0x00f6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I 0x0407: 0x00f8, # CYRILLIC CAPITAL LETTER YI 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE 0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE 0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE 0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE 0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE 0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE 0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I 0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I 0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA 0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL 0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM 0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN 0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O 0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE 0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER 0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES 0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE 0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U 0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF 0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA 0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE 0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE 0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA 0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA 0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN 0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU 0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN 0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E 0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU 0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A 0x0431: 0x00a1, # CYRILLIC SMALL LETTER BE 0x0432: 0x00a2, # CYRILLIC SMALL LETTER VE 0x0433: 0x00a3, # CYRILLIC SMALL LETTER GHE 0x0434: 0x00a4, # CYRILLIC SMALL LETTER DE 0x0435: 0x00a5, # CYRILLIC SMALL LETTER IE 0x0436: 0x00a6, # CYRILLIC SMALL LETTER ZHE 0x0437: 0x00a7, # CYRILLIC SMALL LETTER ZE 0x0438: 0x00a8, # CYRILLIC SMALL LETTER I 0x0439: 0x00a9, # CYRILLIC SMALL LETTER SHORT I 0x043a: 0x00aa, # CYRILLIC SMALL LETTER KA 0x043b: 0x00ab, # CYRILLIC SMALL LETTER EL 0x043c: 0x00ac, # CYRILLIC SMALL LETTER EM 0x043d: 0x00ad, # CYRILLIC SMALL LETTER EN 0x043e: 0x00ae, # CYRILLIC SMALL LETTER O 0x043f: 0x00af, # CYRILLIC SMALL LETTER PE 0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER 0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES 0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE 0x0443: 0x00e3, # CYRILLIC SMALL LETTER U 0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF 0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA 0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE 0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE 0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA 0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA 0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN 0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU 0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN 0x044d: 0x00ed, # CYRILLIC SMALL LETTER E 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO 0x0454: 0x00f5, # CYRILLIC SMALL LETTER UKRAINIAN IE 0x0456: 0x00f7, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 0x0457: 0x00f9, # CYRILLIC SMALL LETTER YI 0x0490: 0x00f2, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN 0x0491: 0x00f3, # CYRILLIC SMALL LETTER GHE WITH UPTURN 0x2116: 0x00fc, # NUMERO SIGN 0x221a: 0x00fb, # SQUARE ROOT 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL 0x2580: 0x00df, # UPPER HALF BLOCK 0x2584: 0x00dc, # LOWER HALF BLOCK 0x2588: 0x00db, # FULL BLOCK 0x258c: 0x00dd, # LEFT HALF BLOCK 0x2590: 0x00de, # RIGHT HALF BLOCK 0x2591: 0x00b0, # LIGHT SHADE 0x2592: 0x00b1, # MEDIUM SHADE 0x2593: 0x00b2, # DARK SHADE 0x25a0: 0x00fe, # BLACK SQUARE } zlib_codec.py 0000644 00000005750 15053241622 0007222 0 ustar 00 """ Python 'zlib_codec' Codec - zlib compression encoding Unlike most of the other codecs which target Unicode, this codec will return Python string objects for both encode and decode. Written by Marc-Andre Lemburg (mal@lemburg.com). """ import codecs import zlib # this codec needs the optional zlib module ! ### Codec APIs def zlib_encode(input,errors='strict'): """ Encodes the object input and returns a tuple (output object, length consumed). errors defines the error handling to apply. It defaults to 'strict' handling which is the only currently supported error handling for this codec. """ assert errors == 'strict' output = zlib.compress(input) return (output, len(input)) def zlib_decode(input,errors='strict'): """ Decodes the object input and returns a tuple (output object, length consumed). input must be an object which provides the bf_getreadbuf buffer slot. Python strings, buffer objects and memory mapped files are examples of objects providing this slot. errors defines the error handling to apply. It defaults to 'strict' handling which is the only currently supported error handling for this codec. """ assert errors == 'strict' output = zlib.decompress(input) return (output, len(input)) class Codec(codecs.Codec): def encode(self, input, errors='strict'): return zlib_encode(input, errors) def decode(self, input, errors='strict'): return zlib_decode(input, errors) class IncrementalEncoder(codecs.IncrementalEncoder): def __init__(self, errors='strict'): assert errors == 'strict' self.errors = errors self.compressobj = zlib.compressobj() def encode(self, input, final=False): if final: c = self.compressobj.compress(input) return c + self.compressobj.flush() else: return self.compressobj.compress(input) def reset(self): self.compressobj = zlib.compressobj() class IncrementalDecoder(codecs.IncrementalDecoder): def __init__(self, errors='strict'): assert errors == 'strict' self.errors = errors self.decompressobj = zlib.decompressobj() def decode(self, input, final=False): if final: c = self.decompressobj.decompress(input) return c + self.decompressobj.flush() else: return self.decompressobj.decompress(input) def reset(self): self.decompressobj = zlib.decompressobj() class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='zlib', encode=zlib_encode, decode=zlib_decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, _is_text_encoding=False, ) utf_32_le.py 0000644 00000001642 15053241622 0006703 0 ustar 00 """ Python 'utf-32-le' Codec """ import codecs ### Codec APIs encode = codecs.utf_32_le_encode def decode(input, errors='strict'): return codecs.utf_32_le_decode(input, errors, True) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.utf_32_le_encode(input, self.errors)[0] class IncrementalDecoder(codecs.BufferedIncrementalDecoder): _buffer_decode = codecs.utf_32_le_decode class StreamWriter(codecs.StreamWriter): encode = codecs.utf_32_le_encode class StreamReader(codecs.StreamReader): decode = codecs.utf_32_le_decode ### encodings module API def getregentry(): return codecs.CodecInfo( name='utf-32-le', encode=encode, decode=decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) raw_unicode_escape.py 0000644 00000002270 15053241622 0010736 0 ustar 00 """ Python 'raw-unicode-escape' Codec Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. """ import codecs ### Codec APIs class Codec(codecs.Codec): # Note: Binding these as C functions will result in the class not # converting them to methods. This is intended. encode = codecs.raw_unicode_escape_encode decode = codecs.raw_unicode_escape_decode class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.raw_unicode_escape_encode(input, self.errors)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.raw_unicode_escape_decode(input, self.errors)[0] class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name='raw-unicode-escape', encode=Codec.encode, decode=Codec.decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, ) __pycache__/raw_unicode_escape.cpython-38.pyc 0000644 00000003344 15053241622 0015227 0 ustar 00 U e5d� � @ sv d Z ddlZG dd� dej�ZG dd� dej�ZG dd� dej�ZG d d � d eej�ZG dd� deej�Zd d� ZdS )z� Python 'raw-unicode-escape' Codec Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. � Nc @ s e Zd ZejZejZdS )�CodecN)�__name__� __module__�__qualname__�codecs�raw_unicode_escape_encode�encode�raw_unicode_escape_decode�decode� r r �4/usr/lib64/python3.8/encodings/raw_unicode_escape.pyr s r c @ s e Zd Zddd�ZdS )�IncrementalEncoderFc C s t �|| j�d S �Nr )r r �errors��self�input�finalr r r r s zIncrementalEncoder.encodeN)F)r r r r r r r r r s r c @ s e Zd Zddd�ZdS )�IncrementalDecoderFc C s t �|| j�d S r )r r r r r r r r s zIncrementalDecoder.decodeN)F)r r r r r r r r r s r c @ s e Zd ZdS )�StreamWriterN�r r r r r r r r s r c @ s e Zd ZdS )�StreamReaderNr r r r r r s r c C s t jdtjtjttttd�S )Nzraw-unicode-escape)�namer r �incrementalencoder�incrementaldecoder�streamwriter�streamreader) r � CodecInfor r r r r r r r r r r �getregentry$ s �r )�__doc__r r r r r r r r r r r �<module> s __pycache__/rot_13.cpython-38.opt-1.pyc 0000644 00000005673 15053241622 0013465 0 ustar 00 U e5d� � 7 @ s d Z ddlZG dd� dej�ZG dd� dej�ZG dd� dej�ZG d d � d eej�ZG dd� deej�Zd d� Ze�e d��Z e �ddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdD�4� dEdF� Ze dGk�rddlZeejej� dS )Hz� Python Character Mapping Codec for ROT13. This codec de/encodes from str to str. Written by Marc-Andre Lemburg (mal@lemburg.com). � Nc @ s e Zd Zddd�Zddd�ZdS ) �Codec�strictc C s t �|t�t|�fS �N��str� translate� rot13_map�len��self�input�errors� r �(/usr/lib64/python3.8/encodings/rot_13.py�encode s zCodec.encodec C s t �|t�t|�fS r r r r r r �decode s zCodec.decodeN)r )r )�__name__� __module__�__qualname__r r r r r r r s r c @ s e Zd Zddd�ZdS )�IncrementalEncoderFc C s t �|t�S r �r r r �r r �finalr r r r s zIncrementalEncoder.encodeN)F)r r r r r r r r r s r c @ s e Zd Zddd�ZdS )�IncrementalDecoderFc C s t �|t�S r r r r r r r s zIncrementalDecoder.decodeN)F)r r r r r r r r r s r c @ s e Zd ZdS )�StreamWriterN�r r r r r r r r s r c @ s e Zd ZdS )�StreamReaderNr r r r r r s r c C s"