Simplify the AMD-specific codepath.

See TODO comment being removed.

Change-Id: I92ce7018f88c24b3e2e61441397fda36b977d3b8
Reviewed-on: https://boringssl-review.googlesource.com/5435
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/cpu-intel.c b/crypto/cpu-intel.c
index 6943999..a50d958 100644
--- a/crypto/cpu-intel.c
+++ b/crypto/cpu-intel.c
@@ -63,7 +63,6 @@
 
 #if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
 
-#include <assert.h>
 #include <inttypes.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -154,13 +153,12 @@
                ecx == 0x444d4163 /* cAMD */;
 
   int has_amd_xop = 0;
-  uint32_t num_amd_extended_ids = 0;
   if (is_amd) {
     /* AMD-specific logic.
      * See http://developer.amd.com/wordpress/media/2012/10/254811.pdf */
     OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000000);
-    num_amd_extended_ids = eax;
-    if (num_amd_extended_ids >= 0x80000001) {
+    uint32_t num_extended_ids = eax;
+    if (num_extended_ids >= 0x80000001) {
       OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000001);
       if (ecx & (1 << 11)) {
         has_amd_xop = 1;
@@ -174,47 +172,27 @@
     extended_features = ebx;
   }
 
-  /* Query the main feature flags and adjust the hyper-threading bit.
-   *
-   * TODO(davidben): Can the AMD half of logic be trimmed down? I haven't found
-   * evidence that any current AMD CPUs share an L1 data cache between threads,
-   * and the CPUID manual (see section 3) suggests this code will always clear
-   * HTT on AMD. Only aes-586.pl queries this, so hopefully any future CPUs will
-   * use better implementations anyway. */
-  if (num_amd_extended_ids >= 0x80000008) {
-    /* See AMD CPUID manual (2.34), page 27. */
-    assert(is_amd);
-    OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000008);
-    uint32_t num_physical_cores = 1 + (ecx & 0xff);
+  /* Determine the number of cores sharing an L1 data cache to adjust the
+   * hyper-threading bit. */
+  uint32_t cores_per_cache = 0;
+  if (is_amd) {
+    /* AMD CPUs never share an L1 data cache between threads but do set the HTT
+     * bit on multi-core CPUs. */
+    cores_per_cache = 1;
+  } else if (num_ids >= 4) {
+    /* TODO(davidben): The Intel manual says this CPUID leaf enumerates all
+     * caches using ECX and doesn't say which is first. Does this matter? */
+    OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 4);
+    cores_per_cache = 1 + ((eax >> 14) & 0xfff);
+  }
 
-    OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1);
+  OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1);
 
-    /* Correct the hyper-threading bit. */
-    if (edx & (1 << 28)) {
-      /* See AMD CPUID manual (2.34), pages 11 and 13. */
-      uint32_t num_logical_cores = (ebx >> 16) & 0xff;
-      if (num_logical_cores <= num_physical_cores) {
-        edx &= ~(1 << 28);
-      }
-    }
-  } else {
-    uint32_t cores_per_cache = 0;
-    if (num_ids >= 4) {
-      /* TODO(davidben): The Intel manual says this CPUID leaf enumerates all
-       * caches using ECX and doesn't say which is first. Does this matter? */
-      OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 4);
-      cores_per_cache = 1 + ((eax >> 14) & 0xfff);
-    }
-
-    OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1);
-
-    /* Correct the hyper-threading bit if the data cache isn't shared between
-     * logical cores. */
-    if (edx & (1 << 28)) {
-      uint32_t num_logical_cores = (ebx >> 16) & 0xff;
-      if (cores_per_cache == 1 || num_logical_cores <= 1) {
-        edx &= ~(1 << 28);
-      }
+  /* Adjust the hyper-threading bit. */
+  if (edx & (1 << 28)) {
+    uint32_t num_logical_cores = (ebx >> 16) & 0xff;
+    if (cores_per_cache == 1 || num_logical_cores <= 1) {
+      edx &= ~(1 << 28);
     }
   }