Browse Source

base64: faster base64 decoding

- by using a lookup table instead of strchr()
- by doing full quantums first, then padding

Closes #10032
Daniel Stenberg 1 year ago
parent
commit
c6f602c93f
1 changed files with 68 additions and 52 deletions
  1. 68 52
      lib/base64.c

+ 68 - 52
lib/base64.c

@@ -51,39 +51,12 @@ static const char base64[]=
 static const char base64url[]=
   "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
 
-static size_t decodeQuantum(unsigned char *dest, const char *src)
-{
-  size_t padding = 0;
-  const char *s;
-  unsigned long i, x = 0;
-
-  for(i = 0, s = src; i < 4; i++, s++) {
-    if(*s == '=') {
-      x <<= 6;
-      padding++;
-    }
-    else {
-      const char *p = strchr(base64, *s);
-      if(p)
-        x = (x << 6) + curlx_uztoul(p - base64);
-      else
-        return 0;
-    }
-  }
-
-  if(padding < 1)
-    dest[2] = curlx_ultouc(x & 0xFFUL);
-
-  x >>= 8;
-  if(padding < 2)
-    dest[1] = curlx_ultouc(x & 0xFFUL);
-
-  x >>= 8;
-  dest[0] = curlx_ultouc(x & 0xFFUL);
-
-  return 3 - padding;
-}
-
+static const unsigned char decodetable[] =
+{ 62, 255, 255, 255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255,
+  255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+  17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255, 255, 26, 27, 28,
+  29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51 };
 /*
  * Curl_base64_decode()
  *
@@ -105,10 +78,11 @@ CURLcode Curl_base64_decode(const char *src,
   size_t padding = 0;
   size_t i;
   size_t numQuantums;
+  size_t fullQuantums;
   size_t rawlen = 0;
-  const char *padptr;
   unsigned char *pos;
   unsigned char *newstr;
+  unsigned char lookup[256];
 
   *outptr = NULL;
   *outlen = 0;
@@ -118,21 +92,18 @@ CURLcode Curl_base64_decode(const char *src,
   if(!srclen || srclen % 4)
     return CURLE_BAD_CONTENT_ENCODING;
 
-  /* Find the position of any = padding characters */
-  padptr = strchr(src, '=');
-  if(padptr) {
+  /* srclen is at least 4 here */
+  while(src[srclen - 1 - padding] == '=') {
+    /* count padding characters */
     padding++;
     /* A maximum of two = padding characters is allowed */
-    if(padptr[1] == '=')
-      padding++;
-
-    /* Check the = padding characters weren't part way through the input */
-    if(padptr + padding != src + srclen)
+    if(padding > 2)
       return CURLE_BAD_CONTENT_ENCODING;
   }
 
   /* Calculate the number of quantums */
   numQuantums = srclen / 4;
+  fullQuantums = numQuantums - (padding ? 1 : 0);
 
   /* Calculate the size of the decoded string */
   rawlen = (numQuantums * 3) - padding;
@@ -144,17 +115,59 @@ CURLcode Curl_base64_decode(const char *src,
 
   pos = newstr;
 
-  /* Decode the quantums */
-  for(i = 0; i < numQuantums; i++) {
-    size_t result = decodeQuantum(pos, src);
-    if(!result) {
-      free(newstr);
-
-      return CURLE_BAD_CONTENT_ENCODING;
+  memset(lookup, 0xff, sizeof(lookup));
+  memcpy(&lookup['+'], decodetable, sizeof(decodetable));
+  /* replaces
+  {
+    unsigned char c;
+    const unsigned char *p = (const unsigned char *)base64;
+    for(c = 0; *p; c++, p++)
+      lookup[*p] = c;
+  }
+  */
+
+  /* Decode the complete quantums first */
+  for(i = 0; i < fullQuantums; i++) {
+    unsigned char val;
+    unsigned int x = 0;
+    int j;
+
+    for(j = 0; j < 4; j++) {
+      val = lookup[(unsigned char)*src++];
+      if(val == 0xff) /* bad symbol */
+        goto bad;
+      x = (x << 6) | val;
     }
-
-    pos += result;
-    src += 4;
+    pos[2] = x & 0xff;
+    pos[1] = (x >> 8) & 0xff;
+    pos[0] = (x >> 16) & 0xff;
+    pos += 3;
+  }
+  if(padding) {
+    /* this means either 8 or 16 bits output */
+    unsigned char val;
+    unsigned int x = 0;
+    int j;
+    size_t padc = 0;
+    for(j = 0; j < 4; j++) {
+      if(*src == '=') {
+        x <<= 6;
+        src++;
+        if(++padc > padding)
+          /* this is a badly placed '=' symbol! */
+          goto bad;
+      }
+      else {
+        val = lookup[(unsigned char)*src++];
+        if(val == 0xff) /* bad symbol */
+          goto bad;
+        x = (x << 6) | val;
+      }
+    }
+    if(padding == 1)
+      pos[1] = (x >> 8) & 0xff;
+    pos[0] = (x >> 16) & 0xff;
+    pos += 3 - padding;
   }
 
   /* Zero terminate */
@@ -165,6 +178,9 @@ CURLcode Curl_base64_decode(const char *src,
   *outlen = rawlen;
 
   return CURLE_OK;
+  bad:
+  free(newstr);
+  return CURLE_BAD_CONTENT_ENCODING;
 }
 
 static CURLcode base64_encode(const char *table64,