Adam Langley | 95c29f3 | 2014-06-20 12:00:00 -0700 | [diff] [blame] | 1 | #!/usr/bin/env perl |
| 2 | |
| 3 | $flavour = shift; |
| 4 | $output = shift; |
| 5 | if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } |
| 6 | |
| 7 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); |
| 8 | |
| 9 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| 10 | ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or |
| 11 | ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or |
| 12 | die "can't locate x86_64-xlate.pl"; |
| 13 | |
| 14 | open OUT,"| \"$^X\" $xlate $flavour $output"; |
| 15 | *STDOUT=*OUT; |
| 16 | |
| 17 | ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order |
| 18 | ("%rdi","%rsi","%rdx","%rcx"); # Unix order |
| 19 | |
| 20 | print<<___; |
| 21 | .text |
| 22 | |
| 23 | .globl OPENSSL_ia32_cpuid |
| 24 | .type OPENSSL_ia32_cpuid,\@function,1 |
| 25 | .align 16 |
| 26 | OPENSSL_ia32_cpuid: |
| 27 | # On Windows, $arg1 is rcx, but that will be clobbered. So make Windows |
| 28 | # use the same register as Unix. |
| 29 | mov $arg1,%rdi |
| 30 | mov %rbx,%r8 # save %rbx |
| 31 | |
| 32 | xor %eax,%eax |
| 33 | mov %eax,8(%rdi) # clear 3rd word |
| 34 | cpuid |
| 35 | mov %eax,%r11d # max value for standard query level |
| 36 | |
| 37 | xor %eax,%eax |
| 38 | cmp \$0x756e6547,%ebx # "Genu" |
| 39 | setne %al |
| 40 | mov %eax,%r9d |
| 41 | cmp \$0x49656e69,%edx # "ineI" |
| 42 | setne %al |
| 43 | or %eax,%r9d |
| 44 | cmp \$0x6c65746e,%ecx # "ntel" |
| 45 | setne %al |
| 46 | or %eax,%r9d # 0 indicates Intel CPU |
| 47 | jz .Lintel |
| 48 | |
| 49 | cmp \$0x68747541,%ebx # "Auth" |
| 50 | setne %al |
| 51 | mov %eax,%r10d |
| 52 | cmp \$0x69746E65,%edx # "enti" |
| 53 | setne %al |
| 54 | or %eax,%r10d |
| 55 | cmp \$0x444D4163,%ecx # "cAMD" |
| 56 | setne %al |
| 57 | or %eax,%r10d # 0 indicates AMD CPU |
| 58 | jnz .Lintel |
| 59 | |
| 60 | # AMD specific |
| 61 | # See http://developer.amd.com/wordpress/media/2012/10/254811.pdf (1) |
| 62 | |
| 63 | mov \$0x80000000,%eax |
| 64 | cpuid |
| 65 | # Returns "The largest CPUID extended function input value supported by |
| 66 | # the processor implementation." in EAX. |
| 67 | cmp \$0x80000001,%eax |
| 68 | jb .Lintel |
| 69 | mov %eax,%r10d |
| 70 | mov \$0x80000001,%eax |
| 71 | cpuid |
| 72 | # Returns feature bits in ECX. See page 20 of [1]. |
| 73 | # TODO(fork): I think this should be a MOV. |
| 74 | or %ecx,%r9d |
| 75 | and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 |
| 76 | |
| 77 | cmp \$0x80000008,%r10d |
| 78 | jb .Lintel |
| 79 | |
| 80 | mov \$0x80000008,%eax |
| 81 | cpuid |
| 82 | # Returns APIC ID and number of cores in ECX. See page 27 of [1]. |
| 83 | movzb %cl,%r10 # number of cores - 1 |
| 84 | inc %r10 # number of cores |
| 85 | |
| 86 | mov \$1,%eax |
| 87 | cpuid |
| 88 | # See page 13 of [1]. |
| 89 | bt \$28,%edx # test hyper-threading bit |
| 90 | jnc .Lgeneric |
| 91 | shr \$16,%ebx # number of logical processors |
| 92 | cmp %r10b,%bl |
| 93 | ja .Lgeneric |
| 94 | and \$0xefffffff,%edx # Clear hyper-threading bit. |
| 95 | jmp .Lgeneric |
| 96 | |
| 97 | .Lintel: |
| 98 | cmp \$4,%r11d |
| 99 | mov \$-1,%r10d |
| 100 | jb .Lnocacheinfo |
| 101 | |
| 102 | mov \$4,%eax |
| 103 | mov \$0,%ecx # query L1D |
| 104 | cpuid |
| 105 | mov %eax,%r10d |
| 106 | shr \$14,%r10d |
| 107 | and \$0xfff,%r10d # number of cores -1 per L1D |
| 108 | |
| 109 | cmp \$7,%r11d |
| 110 | jb .Lnocacheinfo |
| 111 | |
| 112 | mov \$7,%eax |
| 113 | xor %ecx,%ecx |
| 114 | cpuid |
| 115 | mov %ebx,8(%rdi) |
| 116 | |
| 117 | .Lnocacheinfo: |
| 118 | mov \$1,%eax |
| 119 | cpuid |
| 120 | # Gets feature information. See table 3-21 in the Intel manual. |
| 121 | and \$0xbfefffff,%edx # force reserved bits to 0 |
| 122 | cmp \$0,%r9d |
| 123 | jne .Lnotintel |
| 124 | or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs |
| 125 | and \$15,%ah |
| 126 | cmp \$15,%ah # examine Family ID |
| 127 | jne .Lnotintel |
| 128 | or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR |
| 129 | .Lnotintel: |
| 130 | bt \$28,%edx # test hyper-threading bit |
| 131 | jnc .Lgeneric |
| 132 | and \$0xefffffff,%edx # ~(1<<28) - clear hyper-threading. |
| 133 | cmp \$0,%r10d |
| 134 | je .Lgeneric |
| 135 | |
| 136 | or \$0x10000000,%edx # 1<<28 |
| 137 | shr \$16,%ebx |
| 138 | cmp \$1,%bl # see if cache is shared |
| 139 | ja .Lgeneric |
| 140 | and \$0xefffffff,%edx # ~(1<<28) |
| 141 | .Lgeneric: |
| 142 | and \$0x00000800,%r9d # isolate AMD XOP flag |
| 143 | and \$0xfffff7ff,%ecx |
| 144 | or %ecx,%r9d # merge AMD XOP flag |
| 145 | |
| 146 | mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx |
| 147 | bt \$27,%r9d # check OSXSAVE bit |
| 148 | jnc .Lclear_avx |
| 149 | xor %ecx,%ecx # XCR0 |
| 150 | .byte 0x0f,0x01,0xd0 # xgetbv |
| 151 | and \$6,%eax # isolate XMM and YMM state support |
| 152 | cmp \$6,%eax |
| 153 | je .Ldone |
| 154 | .Lclear_avx: |
| 155 | mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) |
| 156 | and %eax,%r9d # clear AVX, FMA and AMD XOP bits |
| 157 | andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5) |
| 158 | .Ldone: |
| 159 | movl %r9d,4(%rdi) |
| 160 | movl %r10d,0(%rdi) |
| 161 | mov %r8,%rbx # restore %rbx |
| 162 | ret |
| 163 | .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid |
| 164 | |
| 165 | ___ |
| 166 | |
| 167 | close STDOUT; # flush |