Mind the end of the buffer in aligned case of generic RC4 implementation.

The generic RC4 implementation may read and write just past the end of the
buffer; when input and output are aligned, it always reads an RC4_CHUNK at a
time. It appropriately masks off and preserves the excess bytes off the end, so
this can only have practical effects if it crosses a page boundary. There's an
alignment check, so that can't happen; page boundaries are always aligned. But
it makes ASan unhappy and strictly speaking is a memory error.

Instead, fall through to the generic codepath which just reads it byte by byte.
This should fix the other bot failure.

Change-Id: I3cbd3bfc6cb0537e87f3252dea12d40ffa78d590
Reviewed-on: https://boringssl-review.googlesource.com/4722
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/rc4/rc4.c b/crypto/rc4/rc4.c
index 2a98fd0..aa19dc2 100644
--- a/crypto/rc4/rc4.c
+++ b/crypto/rc4/rc4.c
@@ -141,37 +141,6 @@
         in += sizeof(RC4_CHUNK);
         out += sizeof(RC4_CHUNK);
       }
-      if (len) {
-        RC4_CHUNK mask = (RC4_CHUNK) - 1, ochunk;
-
-        ichunk = *(RC4_CHUNK *)in;
-        ochunk = *(RC4_CHUNK *)out;
-        otp = 0;
-        i = BESHFT(0);
-        mask <<= (sizeof(RC4_CHUNK) - len) << 3;
-        switch (len & (sizeof(RC4_CHUNK) - 1)) {
-          case 7:
-            otp = RC4_STEP << i, i -= 8;
-          case 6:
-            otp |= RC4_STEP << i, i -= 8;
-          case 5:
-            otp |= RC4_STEP << i, i -= 8;
-          case 4:
-            otp |= RC4_STEP << i, i -= 8;
-          case 3:
-            otp |= RC4_STEP << i, i -= 8;
-          case 2:
-            otp |= RC4_STEP << i, i -= 8;
-          case 1:
-            otp |= RC4_STEP << i, i -= 8;
-        }
-        ochunk &= ~mask;
-        ochunk |= (otp ^ ichunk) & mask;
-        *(RC4_CHUNK *)out = ochunk;
-      }
-      key->x = x;
-      key->y = y;
-      return;
     } else { /* LITTLE-ENDIAN CASE */
 #define LESHFT(c) (((c) * 8) & (sizeof(RC4_CHUNK) * 8 - 1))
       for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) {
@@ -190,37 +159,6 @@
         in += sizeof(RC4_CHUNK);
         out += sizeof(RC4_CHUNK);
       }
-      if (len) {
-        RC4_CHUNK mask = (RC4_CHUNK) - 1, ochunk;
-
-        ichunk = *(RC4_CHUNK *)in;
-        ochunk = *(RC4_CHUNK *)out;
-        otp = 0;
-        i = 0;
-        mask >>= (sizeof(RC4_CHUNK) - len) << 3;
-        switch (len & (sizeof(RC4_CHUNK) - 1)) {
-          case 7:
-            otp = RC4_STEP, i += 8;
-          case 6:
-            otp |= RC4_STEP << i, i += 8;
-          case 5:
-            otp |= RC4_STEP << i, i += 8;
-          case 4:
-            otp |= RC4_STEP << i, i += 8;
-          case 3:
-            otp |= RC4_STEP << i, i += 8;
-          case 2:
-            otp |= RC4_STEP << i, i += 8;
-          case 1:
-            otp |= RC4_STEP << i, i += 8;
-        }
-        ochunk &= ~mask;
-        ochunk |= (otp ^ ichunk) & mask;
-        *(RC4_CHUNK *)out = ochunk;
-      }
-      key->x = x;
-      key->y = y;
-      return;
     }
   }
 #define LOOP(in, out)   \