blob: 59cfd1841c40cb3fa3e823fc032ca0acf5fb1b7a [file] [log] [blame]
Adam Langley95c29f32014-06-20 12:00:00 -07001#!/usr/bin/env perl
2
3$flavour = shift;
4$output = shift;
5if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12die "can't locate x86_64-xlate.pl";
13
14open OUT,"| \"$^X\" $xlate $flavour $output";
15*STDOUT=*OUT;
16
17($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
18 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
19
20print<<___;
21.text
22
23.globl OPENSSL_ia32_cpuid
24.type OPENSSL_ia32_cpuid,\@function,1
25.align 16
26OPENSSL_ia32_cpuid:
27 # On Windows, $arg1 is rcx, but that will be clobbered. So make Windows
28 # use the same register as Unix.
29 mov $arg1,%rdi
30 mov %rbx,%r8 # save %rbx
31
32 xor %eax,%eax
33 mov %eax,8(%rdi) # clear 3rd word
34 cpuid
35 mov %eax,%r11d # max value for standard query level
36
37 xor %eax,%eax
38 cmp \$0x756e6547,%ebx # "Genu"
39 setne %al
40 mov %eax,%r9d
41 cmp \$0x49656e69,%edx # "ineI"
42 setne %al
43 or %eax,%r9d
44 cmp \$0x6c65746e,%ecx # "ntel"
45 setne %al
46 or %eax,%r9d # 0 indicates Intel CPU
47 jz .Lintel
48
49 cmp \$0x68747541,%ebx # "Auth"
50 setne %al
51 mov %eax,%r10d
52 cmp \$0x69746E65,%edx # "enti"
53 setne %al
54 or %eax,%r10d
55 cmp \$0x444D4163,%ecx # "cAMD"
56 setne %al
57 or %eax,%r10d # 0 indicates AMD CPU
58 jnz .Lintel
59
60 # AMD specific
61 # See http://developer.amd.com/wordpress/media/2012/10/254811.pdf (1)
62
63 mov \$0x80000000,%eax
64 cpuid
65 # Returns "The largest CPUID extended function input value supported by
66 # the processor implementation." in EAX.
67 cmp \$0x80000001,%eax
68 jb .Lintel
69 mov %eax,%r10d
70 mov \$0x80000001,%eax
71 cpuid
72 # Returns feature bits in ECX. See page 20 of [1].
73 # TODO(fork): I think this should be a MOV.
74 or %ecx,%r9d
75 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
76
77 cmp \$0x80000008,%r10d
78 jb .Lintel
79
80 mov \$0x80000008,%eax
81 cpuid
82 # Returns APIC ID and number of cores in ECX. See page 27 of [1].
83 movzb %cl,%r10 # number of cores - 1
84 inc %r10 # number of cores
85
86 mov \$1,%eax
87 cpuid
88 # See page 13 of [1].
89 bt \$28,%edx # test hyper-threading bit
90 jnc .Lgeneric
91 shr \$16,%ebx # number of logical processors
92 cmp %r10b,%bl
93 ja .Lgeneric
94 and \$0xefffffff,%edx # Clear hyper-threading bit.
95 jmp .Lgeneric
96
97.Lintel:
98 cmp \$4,%r11d
99 mov \$-1,%r10d
100 jb .Lnocacheinfo
101
102 mov \$4,%eax
103 mov \$0,%ecx # query L1D
104 cpuid
105 mov %eax,%r10d
106 shr \$14,%r10d
107 and \$0xfff,%r10d # number of cores -1 per L1D
108
109 cmp \$7,%r11d
110 jb .Lnocacheinfo
111
112 mov \$7,%eax
113 xor %ecx,%ecx
114 cpuid
115 mov %ebx,8(%rdi)
116
117.Lnocacheinfo:
118 mov \$1,%eax
119 cpuid
120 # Gets feature information. See table 3-21 in the Intel manual.
121 and \$0xbfefffff,%edx # force reserved bits to 0
122 cmp \$0,%r9d
123 jne .Lnotintel
124 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
125 and \$15,%ah
126 cmp \$15,%ah # examine Family ID
127 jne .Lnotintel
128 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
129.Lnotintel:
130 bt \$28,%edx # test hyper-threading bit
131 jnc .Lgeneric
132 and \$0xefffffff,%edx # ~(1<<28) - clear hyper-threading.
133 cmp \$0,%r10d
134 je .Lgeneric
135
136 or \$0x10000000,%edx # 1<<28
137 shr \$16,%ebx
138 cmp \$1,%bl # see if cache is shared
139 ja .Lgeneric
140 and \$0xefffffff,%edx # ~(1<<28)
141.Lgeneric:
142 and \$0x00000800,%r9d # isolate AMD XOP flag
143 and \$0xfffff7ff,%ecx
144 or %ecx,%r9d # merge AMD XOP flag
145
146 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
147 bt \$27,%r9d # check OSXSAVE bit
148 jnc .Lclear_avx
149 xor %ecx,%ecx # XCR0
150 .byte 0x0f,0x01,0xd0 # xgetbv
151 and \$6,%eax # isolate XMM and YMM state support
152 cmp \$6,%eax
153 je .Ldone
154.Lclear_avx:
155 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
156 and %eax,%r9d # clear AVX, FMA and AMD XOP bits
157 andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5)
158.Ldone:
159 movl %r9d,4(%rdi)
160 movl %r10d,0(%rdi)
161 mov %r8,%rbx # restore %rbx
162 ret
163.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
164
165___
166
167close STDOUT; # flush