x86_64 assembly pack: improve masm support.

(Imported from upstream's 371feee876dd8b58531cb6e50fe79262db8e4ed7)

Change-Id: Id3b5ece6b5e5f0565060d5e598ea265d64dac9df
diff --git a/crypto/bn/asm/rsaz-x86_64.pl b/crypto/bn/asm/rsaz-x86_64.pl
index cd651f7..3a4f0c5 100644
--- a/crypto/bn/asm/rsaz-x86_64.pl
+++ b/crypto/bn/asm/rsaz-x86_64.pl
@@ -110,7 +110,7 @@
 
 if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
 	    `ml64 2>&1` =~ /Version ([0-9]+)\./) {
-	$addx = ($1>=11);
+	$addx = ($1>=12);
 }
 
 if (!$addx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) {
diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl
index 29c0f9c..39476ab 100644
--- a/crypto/bn/asm/x86_64-mont.pl
+++ b/crypto/bn/asm/x86_64-mont.pl
@@ -65,7 +65,7 @@
 
 if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
 	    `ml64 2>&1` =~ /Version ([0-9]+)\./) {
-	$addx = ($1>=11);
+	$addx = ($1>=12);
 }
 
 # int bn_mul_mont(
@@ -741,9 +741,11 @@
 my @A1=("%r12","%r13");
 my ($a0,$a1,$ai)=("%r14","%r15","%rbx");
 
+$code.=<<___	if ($addx);
+.extern	bn_sqrx8x_internal		# see x86_64-mont5 module
+___
 $code.=<<___;
 .extern	bn_sqr8x_internal		# see x86_64-mont5 module
-.extern	bn_sqrx8x_internal		# see x86_64-mont5 module
 
 .type	bn_sqr8x_mont,\@function,6
 .align	32
diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl
index 85386c1..5037cab 100644
--- a/crypto/bn/asm/x86_64-mont5.pl
+++ b/crypto/bn/asm/x86_64-mont5.pl
@@ -50,7 +50,7 @@
 
 if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
 	    `ml64 2>&1` =~ /Version ([0-9]+)\./) {
-	$addx = ($1>=11);
+	$addx = ($1>=12);
 }
 
 # int bn_mul_mont_gather5(
diff --git a/crypto/modes/asm/aesni-gcm-x86_64.pl b/crypto/modes/asm/aesni-gcm-x86_64.pl
index 5fb437c..cfc856c 100644
--- a/crypto/modes/asm/aesni-gcm-x86_64.pl
+++ b/crypto/modes/asm/aesni-gcm-x86_64.pl
@@ -92,7 +92,7 @@
 
 .align	32
 .Loop6x:
-	add		\$100663296,$counter
+	add		\$`6<<24`,$counter
 	jc		.Lhandle_ctr32		# discard $inout[1-5]?
 	vmovdqu		0x00-0x20($Xip),$Hkey	# $Hkey^1
 	  vpaddb	$T2,$inout5,$T1		# next counter value
@@ -520,7 +520,7 @@
 	vmovups		0x10-0x80($key),$rndkey
 	lea		0x20-0x80($key),%r12
 	vpxor		$Z0,$T1,$inout0
-	add		\$100663296,$counter
+	add		\$`6<<24`,$counter
 	jc		.Lhandle_ctr32_2
 	vpaddb		$T2,$T1,$inout1
 	vpaddb		$T2,$inout1,$inout2
diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl
index 24ff1b4..686809b 100755
--- a/crypto/perlasm/x86_64-xlate.pl
+++ b/crypto/perlasm/x86_64-xlate.pl
@@ -112,7 +112,6 @@
 	    $line = substr($line,@+[0]); $line =~ s/^\s+//;
 
 	    undef $self->{sz};
-	    undef $self->{arg_sz};
 	    if ($self->{op} =~ /^(movz)x?([bw]).*/) {	# movz is pain...
 		$self->{op} = $1;
 		$self->{sz} = $2;
@@ -120,12 +119,6 @@
 		$self->{sz} = "";
 	    } elsif ($self->{op} =~ /^p/ && $' !~ /^(ush|op|insrw)/) { # SSEn
 		$self->{sz} = "";
-	    } elsif ($self->{op} eq "vpbroadcastq") {
-		$self->{arg_sz} = "q";
-	    } elsif ($self->{op} eq "vpbroadcastb") {
-		$self->{arg_sz} = "b";
-	    } elsif ($self->{op} =~ /^vinserti128/) {
-		$self->{arg_sz} = "x";
 	    } elsif ($self->{op} =~ /^v/) { # VEX
 		$self->{sz} = "";
 	    } elsif ($self->{op} =~ /movq/ && $line =~ /%xmm/) {
@@ -143,10 +136,6 @@
 	$self->{sz} = $sz if (defined($sz) && !defined($self->{sz}));
 	$self->{sz};
     }
-    sub arg_size {
-	my $self = shift;
-	$self->{arg_sz};
-    }
     sub out {
 	my $self = shift;
 	if ($gas) {
@@ -281,15 +270,20 @@
 		sprintf "%s%s(%%%s)",	$self->{asterisk},$self->{label},$self->{base};
 	    }
 	} else {
-	    %szmap = (	b=>"BYTE$PTR", w=>"WORD$PTR", l=>"DWORD$PTR",
-	    		q=>"QWORD$PTR",o=>"OWORD$PTR",x=>"XMMWORD$PTR",
-			y=>"YMMWORD$PTR" );
+	    %szmap = (	b=>"BYTE$PTR",  w=>"WORD$PTR",
+			l=>"DWORD$PTR", d=>"DWORD$PTR",
+	    		q=>"QWORD$PTR", o=>"OWORD$PTR",
+			x=>"XMMWORD$PTR", y=>"YMMWORD$PTR", z=>"ZMMWORD$PTR" );
 
 	    $self->{label} =~ s/\./\$/g;
 	    $self->{label} =~ s/(?<![\w\$\.])0x([0-9a-f]+)/0$1h/ig;
 	    $self->{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/);
-	    $sz="q" if ($self->{asterisk} || opcode->mnemonic() =~ /^v?movq$/);
-	    $sz="l" if (opcode->mnemonic() =~ /^v?movd$/);
+
+	    ($self->{asterisk})					&& ($sz="q") ||
+	    (opcode->mnemonic() =~ /^v?mov([qd])$/)		&& ($sz=$1)  ||
+	    (opcode->mnemonic() =~ /^v?pinsr([qdwb])$/)		&& ($sz=$1)  ||
+	    (opcode->mnemonic() =~ /^vpbroadcast([qdwb])$/)	&& ($sz=$1)  ||
+	    (opcode->mnemonic() =~ /^vinsert[fi]128$/)		&& ($sz="x");
 
 	    if (defined($self->{index})) {
 		sprintf "%s[%s%s*%d%s]",$szmap{$sz},
@@ -547,7 +541,7 @@
 					$v="$current_segment\tENDS\n" if ($current_segment);
 					$current_segment = ".text\$";
 					$v.="$current_segment\tSEGMENT ";
-					$v.=$masm>=$masmref ? "ALIGN(64)" : "PAGE";
+					$v.=$masm>=$masmref ? "ALIGN(256)" : "PAGE";
 					$v.=" 'CODE'";
 				    }
 				    $self->{value} = $v;
@@ -789,6 +783,19 @@
     }
 };
 
+my $rdseed = sub {
+    if (shift =~ /%[er](\w+)/) {
+      my @opcode=();
+      my $dst=$1;
+	if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; }
+	rex(\@opcode,0,$1,8);
+	push @opcode,0x0f,0xc7,0xf8|($dst&7);
+	@opcode;
+    } else {
+	();
+    }
+};
+
 sub rxb {
  local *opcode=shift;
  my ($dst,$src1,$src2,$rxb)=@_;
@@ -832,6 +839,8 @@
     print <<___;
 default	rel
 %define XMMWORD
+%define YMMWORD
+%define ZMMWORD
 ___
 } elsif ($masm) {
     print <<___;
@@ -886,7 +895,6 @@
 	if ($#args>=0) {
 	    my $insn;
 	    my $sz=opcode->size();
-	    my $arg_sz=opcode->arg_size();
 
 	    if ($gas) {
 		$insn = $opcode->out($#args>=1?$args[$#args]->size():$sz);
@@ -899,18 +907,12 @@
 		    # $insn.=$sz compensates for movq, pinsrw, ...
 		    if ($arg =~ /^xmm[0-9]+$/) { $insn.=$sz; $sz="x" if(!$sz); last; }
 		    if ($arg =~ /^ymm[0-9]+$/) { $insn.=$sz; $sz="y" if(!$sz); last; }
+		    if ($arg =~ /^zmm[0-9]+$/) { $insn.=$sz; $sz="z" if(!$sz); last; }
 		    if ($arg =~ /^mm[0-9]+$/)  { $insn.=$sz; $sz="q" if(!$sz); last; }
 		}
-		$sz=$arg_sz if($arg_sz);
 		@args = reverse(@args);
 		undef $sz if ($nasm && $opcode->mnemonic() eq "lea");
-
-		if ($insn eq "movq" && $#args == 1 && $args[0]->out($sz) eq "xmm0" && $args[1]->out($sz) eq "rax") {
-		    # I have no clue why MASM can't parse this instruction.
-		    printf "DB 66h, 48h, 0fh, 6eh, 0c0h";
-		} else {
-		    printf "\t%s\t%s",$insn,join(",",map($_->out($sz),@args));
-		}
+		printf "\t%s\t%s",$insn,join(",",map($_->out($sz),@args));
 	    }
 	} else {
 	    printf "\t%s",$opcode->out();