Base32 Encoding and Decoding in Javascript

This is a simple implementation of base 32 encoding and decoding that conforms to RFC 4648.

I was doing this because I needed to spend some quality time reading technical docs, mainly so I’d get in the groove of writing better technical docs for myself and work. (This had been inspired by reading a few patents. They were painful to read – but I’ll get used to it.)

I did it mostly from the spec, but read another implementation to get a better idea of how to decode.

The main advantage of base32 is that it is case insensitive to computers. An alternative, base32hex is a similar encoding that sorts the same as the original text. So you can sort encoded strings without first decoding them.

Base32 is also designed without using 0 and 1 (zero and one) because they’re similar to “O” and “I” and lowercase “L”. (This means it’s not case-insensitive to human beings, because you can still confuse “l” and “I”.)

The code below is placed in the public domain.

Thank you for the comments finding bugs.

<html>
<body>
<script>
// Note that we assume ascii strings, not unicode.
// A better implementation should use array buffers
// of bytes, and force a conversion before executing,
// and convert outputs back into strings.
(function(exports) {
	var base32 = {
		a: "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567",
		pad: "=",
		encode: function (s) {
			var a = this.a;
			var pad = this.pad;
			var len = s.length;
			var o = "";
			var w, c, r=0, sh=0; // word, character, remainder, shift
			for(i=0; i<len; i+=5) {
				// mask top 5 bits
				c = s.charCodeAt(i);
				w = 0xf8 & c;
				o += a.charAt(w>>3);
				r = 0x07 & c;
				sh = 2;

				if ((i+1)<len) {
					c = s.charCodeAt(i+1);
					// mask top 2 bits
					w = 0xc0 & c;
					o += a.charAt((r<<2) + (w>>6));
					o += a.charAt( (0x3e & c) >> 1 );
					r = c & 0x01;
					sh = 4;
				}
				
				if ((i+2)<len) {
					c = s.charCodeAt(i+2);
					// mask top 4 bits
					w = 0xf0 & c;
					o += a.charAt((r<<4) + (w>>4));
					r = 0x0f & c;
					sh = 1;
				}

				if ((i+3)<len) {
					c = s.charCodeAt(i+3);
					// mask top 1 bit
					w = 0x80 & c;
					o += a.charAt((r<<1) + (w>>7));
					o += a.charAt((0x7c & c) >> 2);
					r = 0x03 & c;
					sh = 3;
				}

				if ((i+4)<len) {
					c = s.charCodeAt(i+4);
					// mask top 3 bits
					w = 0xe0 & c;
					o += a.charAt((r<<3) + (w>>5));
					o += a.charAt(0x1f & c);
					r = 0;
					sh = 0;
				} 
			}
			// Encode the final character.
			if (sh != 0) { o += a.charAt(r<<sh); }
			// Calculate length of pad by getting the 
			// number of words to reach an 8th octet.
			var padlen = 8 - (o.length % 8);
			// modulus 
			if (padlen==8) { return o; }
			if (padlen==1) { return o + pad; }
			if (padlen==3) { return o + pad + pad + pad; }
			if (padlen==4) { return o + pad + pad + pad + pad; }
			if (padlen==6) { return o + pad + pad + pad + pad + pad + pad; }
			console.log('there was some kind of error');
			console.log('padlen:'+padlen+' ,r:'+r+' ,sh:'+sh+', w:'+w);
		},
		decode: function(s) {
			var len = s.length;
			var apad = this.a + this.pad;
			var v,x,r=0,bits=0,c,o='';

			s = s.toUpperCase();

			for(i=0;i<len;i+=1) {
				v = apad.indexOf(s.charAt(i));
				if (v>=0 && v<32) {
					x = (x << 5) | v;
					bits += 5;
					if (bits >= 8) {
						c = (x >> (bits - 8)) & 0xff;
						o = o + String.fromCharCode(c);
						bits -= 8;
					}
				}
			}
			// remaining bits are < 8
			if (bits>0) {
				c = ((x << (8 - bits)) & 0xff) >> (8 - bits);
				// Don't append a null terminator.
				// See the comment at the top about why this sucks.
				if (c!==0) {
					o = o + String.fromCharCode(c);
				}
			}
			return o;
		}
	};

	var base32hex = {
		a: '0123456789ABCDEFGHIJKLMNOPQRSTUV',
		pad: '=',
		encode: base32.encode,
		decode: base32.decode
	};
	exports.base32 = base32;
	exports.base32hex = base32hex;
})(this.Conversions = {});

// tests
function testBase32Encode( st, expect ) {
	res = Conversions.base32.encode(st)
	res == expect || console.log('base32.encode("' + st + '") returned ' + res + ', expected ' + expect);
}
function testBase32HexEncode( st, expect ) {
	res = Conversions.base32hex.encode(st)
	res == expect || console.log('base32hex.encode("' + st + '") returned ' + res + ', expected ' + expect);
}
function testBase32Decode( st, expect ) {
	res = Conversions.base32.decode(st)
	res == expect || console.log('base32.decode("' + st + '") returned ' + res + ', expected ' + expect);
}
function testBase32HexDecode( st, expect ) {
	res = Conversions.base32hex.decode(st)
	res == expect || console.log('base32hex.decode("' + st + '") returned ' + res + ', expected ' + expect);
}
function test32( original, encoded ) {
	testBase32Encode( original, encoded );
	testBase32Decode( encoded, original );
}
function test32Hex( original, encoded ) {
	testBase32HexEncode( original, encoded );
	testBase32HexDecode( encoded, original );
}

test32('foo', 'MZXW6===');
test32('foob', 'MZXW6YQ=');
test32('fooba', 'MZXW6YTB');
test32('foobar', 'MZXW6YTBOI======');
test32Hex('', '');
test32Hex('f', "CO======");
test32Hex('fo', "CPNG====");
test32Hex('foo', "CPNMU===");
test32Hex('foob', "CPNMUOG=");
test32Hex('fooba', "CPNMUOJ1");
test32Hex('foobar', "CPNMUOJ1E8======");
// last charcter bug
test32('100', 'GEYDA==='); // 49 48 48 = 00110001 00110000 00110000 -> 00110 00100 11000 00011 0000- -> 6 4 24 3 0
// last character tests
// 00000000 -> 00000 000--
// 00000000 00000000 -> 00000 00000 00000 0----
// 00000000 00000000 00000000 -> 00000 00000 00000 00000 0000-
// 00000000 00000000 00000000 00000000 -> 00000 00000 00000 00000 00000 00000 00---
// 1 char
//   all zeros tail
test32( 'x', 'PA======' ); // 01111000 -> 01111 000-- -> 15 0
//   not zero tail
test32( 'w', 'O4======' ); // 01110111 -> 01110 111-- -> 14 28
// 2 chars
//   all zeros tail
test32( 'wj', 'O5VA====' ); // 01110111 01101010 -> 01110 11101 10101 0---- -> 14 29 21 0
//   not zero tail
test32( 'wk', 'O5VQ====' ); // 01110111 01101011 -> 01110 11101 10101 1---- -> 14 29 21 16
// 3 chars
//   all zeros tail
test32( 'wkp', 'O5VXA===' ); // 01110111 01101011 01110000 -> 01110 11101 10101 10111 0000- -> 14 29 21 23 0 
//   not zero tail
test32( 'wko', 'O5VW6===' ); // 01110111 01101011 01101111 -> 01110 11101 10101 10110 1111- -> 14 29 21 22 30
// 4 chars
//   all zeros tail
test32( 'wkpt', 'O5VXA5A=' ); // 01110111 01101011 0111000 01110100 -> 01110 11101 10101 10111 00000 11101 00--- -> 14 29 21 23 0 29 0
//   not zero tail
test32( 'wkps', 'O5VXA4Y=' ); // 01110111 01101011 0111000 01110011 -> 01110 11101 10101 10111 00000 11100 11--- -> 14 29 21 23 0 28 24

</script>
</body>
</html>

Leave a Reply to fireattackCancel reply

fireattack

fireattack

There is an error.

It should be

if (v >= 0 && v < 32) {

Otherwise the first encoding in the table (A) will be ignored when decoding.

Rob

Rob

It seems there is an error:

Conversions.base32.encode(“100”)
“GEYD====”

should be
GEYDA===

johnk

johnk

Thank you. I found the logic error. I’ll post a correction soon.