1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 | Lib/base64.py
#! /usr/bin/env python """RFC 3548: Base16, Base32, Base64 Data Encodings""" # Modified 04-Oct-1995 by Jack Jansen to use binascii module # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support import re import struct import string import binascii __all__ = [ # Legacy interface exports traditional RFC 1521 Base64 encodings 'encode', 'decode', 'encodestring', 'decodestring', # Generalized interface for other encodings 'b64encode', 'b64decode', 'b32encode', 'b32decode', 'b16encode', 'b16decode', # Standard Base64 encoding 'standard_b64encode', 'standard_b64decode', # Some common Base64 alternatives. As referenced by RFC 3458, see thread # starting at: # # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html 'urlsafe_b64encode', 'urlsafe_b64decode', ] _translation = [chr(_x) for _x in range(256)] EMPTYSTRING = '' def _translate(s, altchars): translation = _translation[:] for k, v in altchars.items(): translation[ord(k)] = v return s.translate(''.join(translation)) # Base64 encoding/decoding uses binascii def b64encode(s, altchars=None): """Encode a string using Base64. s is the string to encode. Optional altchars must be a string of at least length 2 (additional characters are ignored) which specifies an alternative alphabet for the '+' and '/' characters. This allows an application to e.g. generate url or filesystem safe Base64 strings. The encoded string is returned. """ # Strip off the trailing newline encoded = binascii.b2a_base64(s)[:-1] if altchars is not None: return encoded.translate(string.maketrans(b'+/', altchars[:2])) return encoded def b64decode(s, altchars=None): """Decode a Base64 encoded string. s is the string to decode. Optional altchars must be a string of at least length 2 (additional characters are ignored) which specifies the alternative alphabet used instead of the '+' and '/' characters. The decoded string is returned. A TypeError is raised if s were incorrectly padded or if there are non-alphabet characters present in the string. """ if altchars is not None: s = s.translate(string.maketrans(altchars[:2], '+/')) try: return binascii.a2b_base64(s) except binascii.Error, msg: # Transform this exception for consistency raise TypeError(msg) def standard_b64encode(s): """Encode a string using the standard Base64 alphabet. s is the string to encode. The encoded string is returned. """ return b64encode(s) def standard_b64decode(s): """Decode a string encoded with the standard Base64 alphabet. s is the string to decode. The decoded string is returned. A TypeError is raised if the string is incorrectly padded or if there are non-alphabet characters present in the string. """ return b64decode(s) _urlsafe_encode_translation = string.maketrans(b'+/', b'-_') _urlsafe_decode_translation = string.maketrans(b'-_', b'+/') def urlsafe_b64encode(s): """Encode a string using a url-safe Base64 alphabet. s is the string to encode. The encoded string is returned. The alphabet uses '-' instead of '+' and '_' instead of '/'. """ return b64encode(s).translate(_urlsafe_encode_translation) def urlsafe_b64decode(s): """Decode a string encoded with the standard Base64 alphabet. s is the string to decode. The decoded string is returned. A TypeError is raised if the string is incorrectly padded or if there are non-alphabet characters present in the string. The alphabet uses '-' instead of '+' and '_' instead of '/'. """ return b64decode(s.translate(_urlsafe_decode_translation)) # Base32 encoding/decoding must be done in Python _b32alphabet = { 0: 'A', 9: 'J', 18: 'S', 27: '3', 1: 'B', 10: 'K', 19: 'T', 28: '4', 2: 'C', 11: 'L', 20: 'U', 29: '5', 3: 'D', 12: 'M', 21: 'V', 30: '6', 4: 'E', 13: 'N', 22: 'W', 31: '7', 5: 'F', 14: 'O', 23: 'X', 6: 'G', 15: 'P', 24: 'Y', 7: 'H', 16: 'Q', 25: 'Z', 8: 'I', 17: 'R', 26: '2', } _b32tab = _b32alphabet.items() _b32tab.sort() _b32tab = [v for k, v in _b32tab] _b32rev = dict([(v, long(k)) for k, v in _b32alphabet.items()]) def b32encode(s): """Encode a string using Base32. s is the string to encode. The encoded string is returned. """ parts = [] quanta, leftover = divmod(len(s), 5) # Pad the last quantum with zero bits if necessary if leftover: s += ('\0' * (5 - leftover)) quanta += 1 for i in range(quanta): # c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this # code is to process the 40 bits in units of 5 bits. So we take the 1 # leftover bit of c1 and tack it onto c2. Then we take the 2 leftover # bits of c2 and tack them onto c3. The shifts and masks are intended # to give us values of exactly 5 bits in width. c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5]) c2 += (c1 & 1) << 16 # 17 bits wide c3 += (c2 & 3) << 8 # 10 bits wide parts.extend([_b32tab[c1 >> 11], # bits 1 - 5 _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10 _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15 _b32tab[c2 >> 12], # bits 16 - 20 (1 - 5) _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10) _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15) _b32tab[c3 >> 5], # bits 31 - 35 (1 - 5) _b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5) ]) encoded = EMPTYSTRING.join(parts) # Adjust for any leftover partial quanta if leftover == 1: return encoded[:-6] + '======' elif leftover == 2: return encoded[:-4] + '====' elif leftover == 3: return encoded[:-3] + '===' elif leftover == 4: return encoded[:-1] + '=' return encoded def b32decode(s, casefold=False, map01=None): """Decode a Base32 encoded string. s is the string to decode. Optional casefold is a flag specifying whether a lowercase alphabet is acceptable as input. For security purposes, the default is False. RFC 3548 allows for optional mapping of the digit 0 (zero) to the letter O (oh), and for optional mapping of the digit 1 (one) to either the letter I (eye) or letter L (el). The optional argument map01 when not None, specifies which letter the digit 1 should be mapped to (when map01 is not None, the digit 0 is always mapped to the letter O). For security purposes the default is None, so that 0 and 1 are not allowed in the input. The decoded string is returned. A TypeError is raised if s were incorrectly padded or if there are non-alphabet characters present in the string. """ quanta, leftover = divmod(len(s), 8) if leftover: raise TypeError('Incorrect padding') # Handle section 2.4 zero and one mapping. The flag map01 will be either # False, or the character to map the digit 1 (one) to. It should be # either L (el) or I (eye). if map01: s = s.translate(string.maketrans(b'01', b'O' + map01)) if casefold: s = s.upper() # Strip off pad characters from the right. We need to count the pad # characters because this will tell us how many null bytes to remove from # the end of the decoded string. padchars = 0 mo = re.search('(?P<pad>[=]*)$', s) if mo: padchars = len(mo.group('pad')) if padchars > 0: s = s[:-padchars] # Now decode the full quanta parts = [] acc = 0 shift = 35 for c in s: val = _b32rev.get(c) if val is None: raise TypeError('Non-base32 digit found') acc += _b32rev[c] << shift shift -= 5 if shift < 0: parts.append(binascii.unhexlify('%010x' % acc)) acc = 0 shift = 35 # Process the last, partial quanta last = binascii.unhexlify('%010x' % acc) if padchars == 0: last = '' # No characters elif padchars == 1: last = last[:-1] elif padchars == 3: last = last[:-2] elif padchars == 4: last = last[:-3] elif padchars == 6: last = last[:-4] else: raise TypeError('Incorrect padding') parts.append(last) return EMPTYSTRING.join(parts) # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns # lowercase. The RFC also recommends against accepting input case # insensitively. def b16encode(s): """Encode a string using Base16. s is the string to encode. The encoded string is returned. """ return binascii.hexlify(s).upper() def b16decode(s, casefold=False): """Decode a Base16 encoded string. s is the string to decode. Optional casefold is a flag specifying whether a lowercase alphabet is acceptable as input. For security purposes, the default is False. The decoded string is returned. A TypeError is raised if s were incorrectly padded or if there are non-alphabet characters present in the string. """ if casefold: s = s.upper() if re.search('[^0-9A-F]', s): raise TypeError('Non-base16 digit found') return binascii.unhexlify(s) # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it # though. MAXLINESIZE = 76 # Excluding the CRLF MAXBINSIZE = (MAXLINESIZE//4)*3 def encode(input, output): """Encode a file.""" while True: s = input.read(MAXBINSIZE) if not s: break while len(s) < MAXBINSIZE: ns = input.read(MAXBINSIZE-len(s)) if not ns: break s += ns line = binascii.b2a_base64(s) output.write(line) def decode(input, output): """Decode a file.""" while True: line = input.readline() if not line: break s = binascii.a2b_base64(line) output.write(s) def encodestring(s): """Encode a string into multiple lines of base-64 data.""" pieces = [] for i in range(0, len(s), MAXBINSIZE): chunk = s[i : i + MAXBINSIZE] pieces.append(binascii.b2a_base64(chunk)) return "".join(pieces) def decodestring(s): """Decode a string.""" return binascii.a2b_base64(s) # Useable as a script... def test(): """Small test program""" import sys, getopt try: opts, args = getopt.getopt(sys.argv[1:], 'deut') except getopt.error, msg: sys.stdout = sys.stderr print msg print """usage: %s [-d|-e|-u|-t] [file|-] -d, -u: decode -e: encode (default) -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0] sys.exit(2) func = encode for o, a in opts: if o == '-e': func = encode if o == '-d': func = decode if o == '-u': func = decode if o == '-t': test1(); return if args and args[0] != '-': with open(args[0], 'rb') as f: func(f, sys.stdout) else: func(sys.stdin, sys.stdout) def test1(): s0 = "Aladdin:open sesame" s1 = encodestring(s0) s2 = decodestring(s1) print s0, repr(s1), s2 if __name__ == '__main__': test() |