Convert remaining Latin-1 files to UTF-8.
See upstream's 9f0b86c68bb96d49301bbd6473c8235ca05ca06b. Generated by
using upstream's script in 5a3ce86e21715a683ff0d32421ed5c6d5e84234d and
then manually throwing out the false positives. (We converted a bunch of
stuff already in 91157550061d5d794898fe47b95384a7ba5f7b9d.)
This may require some wrestling with depot_tools to land in Chromium due
to Rietveld's encoding bugs, but hopefully that will avoid future
problems; Rietveld breaks if either old or new file is Latin-1.
Change-Id: I26dcb20c7377f92a0c843ef5d74d440a82ea8ceb
Reviewed-on: https://boringssl-review.googlesource.com/5483
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/aes/asm/aes-586.pl b/crypto/aes/asm/aes-586.pl
index 07fb94c..6e8a6a8 100755
--- a/crypto/aes/asm/aes-586.pl
+++ b/crypto/aes/asm/aes-586.pl
@@ -45,7 +45,7 @@
# the undertaken effort was that it appeared that in tight IA-32
# register window little-endian flavor could achieve slightly higher
# Instruction Level Parallelism, and it indeed resulted in up to 15%
-# better performance on most recent µ-archs...
+# better performance on most recent µ-archs...
#
# Third version adds AES_cbc_encrypt implementation, which resulted in
# up to 40% performance imrovement of CBC benchmark results. 40% was
@@ -224,7 +224,7 @@
$speed_limit=512; # chunks smaller than $speed_limit are
# processed with compact routine in CBC mode
$small_footprint=1; # $small_footprint=1 code is ~5% slower [on
- # recent µ-archs], but ~5 times smaller!
+ # recent µ-archs], but ~5 times smaller!
# I favor compact code to minimize cache
# contention and in hope to "collect" 5% back
# in real-life applications...
@@ -565,7 +565,7 @@
# Performance is not actually extraordinary in comparison to pure
# x86 code. In particular encrypt performance is virtually the same.
# Decrypt performance on the other hand is 15-20% better on newer
-# µ-archs [but we're thankful for *any* improvement here], and ~50%
+# µ-archs [but we're thankful for *any* improvement here], and ~50%
# better on PIII:-) And additionally on the pros side this code
# eliminates redundant references to stack and thus relieves/
# minimizes the pressure on the memory bus.
diff --git a/crypto/modes/asm/ghash-armv4.pl b/crypto/modes/asm/ghash-armv4.pl
index 25a4e27..a0d04ce 100644
--- a/crypto/modes/asm/ghash-armv4.pl
+++ b/crypto/modes/asm/ghash-armv4.pl
@@ -45,7 +45,7 @@
# processes one byte in 8.45 cycles, A9 - in 10.2, A15 - in 7.63,
# Snapdragon S4 - in 9.33.
#
-# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
# Polynomial Multiplication on ARM Processors using the NEON Engine.
#
# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
@@ -457,12 +457,12 @@
veor $IN,$Xl @ inp^=Xi
.Lgmult_neon:
___
- &clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo
+ &clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo
$code.=<<___;
veor $IN#lo,$IN#lo,$IN#hi @ Karatsuba pre-processing
___
- &clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi)
- &clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi
+ &clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ &clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi
$code.=<<___;
veor $Xm,$Xm,$Xl @ Karatsuba post-processing
veor $Xm,$Xm,$Xh
diff --git a/crypto/modes/asm/ghash-x86.pl b/crypto/modes/asm/ghash-x86.pl
index 23a5527..0269169 100644
--- a/crypto/modes/asm/ghash-x86.pl
+++ b/crypto/modes/asm/ghash-x86.pl
@@ -358,7 +358,7 @@
# effective address calculation and finally merge of value to Z.hi.
# Reference to rem_4bit is scheduled so late that I had to >>4
# rem_4bit elements. This resulted in 20-45% procent improvement
-# on contemporary µ-archs.
+# on contemporary µ-archs.
{
my $cnt;
my $rem_4bit = "eax";
diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl
index 6e656ca..5a7ce39 100644
--- a/crypto/modes/asm/ghash-x86_64.pl
+++ b/crypto/modes/asm/ghash-x86_64.pl
@@ -576,15 +576,15 @@
# experimental alternative. special thing about is that there
# no dependency between the two multiplications...
mov \$`0xE1<<1`,%eax
- mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff
+ mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff
mov \$0x07,%r11d
movq %rax,$T1
movq %r10,$T2
movq %r11,$T3 # borrow $T3
pand $Xi,$T3
- pshufb $T3,$T2 # ($Xi&7)·0xE0
+ pshufb $T3,$T2 # ($Xi&7)·0xE0
movq %rax,$T3
- pclmulqdq \$0x00,$Xi,$T1 # ·(0xE1<<1)
+ pclmulqdq \$0x00,$Xi,$T1 # ·(0xE1<<1)
pxor $Xi,$T2
pslldq \$15,$T2
paddd $T2,$T2 # <<(64+56+1)
@@ -657,7 +657,7 @@
je .Lskip4x
sub \$0x30,$len
- mov \$0xA040608020C0E000,%rax # ((7..0)·0xE0)&0xff
+ mov \$0xA040608020C0E000,%rax # ((7..0)·0xE0)&0xff
movdqu 0x30($Htbl),$Hkey3
movdqu 0x40($Htbl),$Hkey4
diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl
index 686951f..5856c94 100644
--- a/crypto/modes/asm/ghashv8-armx.pl
+++ b/crypto/modes/asm/ghashv8-armx.pl
@@ -148,10 +148,10 @@
#endif
vext.8 $IN,$t1,$t1,#8
- vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
+ vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
veor $t1,$t1,$IN @ Karatsuba pre-processing
- vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
- vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
+ vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
veor $t2,$Xl,$Xh
@@ -239,7 +239,7 @@
#endif
vext.8 $In,$t1,$t1,#8
veor $IN,$IN,$Xl @ I[i]^=Xi
- vpmull.p64 $Xln,$H,$In @ H·Ii+1
+ vpmull.p64 $Xln,$H,$In @ H·Ii+1
veor $t1,$t1,$In @ Karatsuba pre-processing
vpmull2.p64 $Xhn,$H,$In
b .Loop_mod2x_v8
@@ -248,14 +248,14 @@
.Loop_mod2x_v8:
vext.8 $t2,$IN,$IN,#8
subs $len,$len,#32 @ is there more data?
- vpmull.p64 $Xl,$H2,$IN @ H^2.lo·Xi.lo
+ vpmull.p64 $Xl,$H2,$IN @ H^2.lo·Xi.lo
cclr $inc,lo @ is it time to zero $inc?
vpmull.p64 $Xmn,$Hhl,$t1
veor $t2,$t2,$IN @ Karatsuba pre-processing
- vpmull2.p64 $Xh,$H2,$IN @ H^2.hi·Xi.hi
+ vpmull2.p64 $Xh,$H2,$IN @ H^2.hi·Xi.hi
veor $Xl,$Xl,$Xln @ accumulate
- vpmull2.p64 $Xm,$Hhl,$t2 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
+ vpmull2.p64 $Xm,$Hhl,$t2 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
vld1.64 {$t0},[$inp],$inc @ load [rotated] I[i+2]
veor $Xh,$Xh,$Xhn
@@ -280,7 +280,7 @@
vext.8 $In,$t1,$t1,#8
vext.8 $IN,$t0,$t0,#8
veor $Xl,$Xm,$t2
- vpmull.p64 $Xln,$H,$In @ H·Ii+1
+ vpmull.p64 $Xln,$H,$In @ H·Ii+1
veor $IN,$IN,$Xh @ accumulate $IN early
vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
@@ -304,10 +304,10 @@
veor $IN,$IN,$Xl @ inp^=Xi
veor $t1,$t0,$t2 @ $t1 is rotated inp^Xi
- vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
+ vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
veor $t1,$t1,$IN @ Karatsuba pre-processing
- vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
- vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
+ vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
veor $t2,$Xl,$Xh
diff --git a/crypto/rc4/asm/rc4-x86_64.pl b/crypto/rc4/asm/rc4-x86_64.pl
index db46242..cef6268 100644
--- a/crypto/rc4/asm/rc4-x86_64.pl
+++ b/crypto/rc4/asm/rc4-x86_64.pl
@@ -56,7 +56,7 @@
# achieves respectful 432MBps on 2.8GHz processor now. For reference.
# If executed on Xeon, current RC4_CHAR code-path is 2.7x faster than
# RC4_INT code-path. While if executed on Opteron, it's only 25%
-# slower than the RC4_INT one [meaning that if CPU µ-arch detection
+# slower than the RC4_INT one [meaning that if CPU µ-arch detection
# is not implemented, then this final RC4_CHAR code-path should be
# preferred, as it provides better *all-round* performance].
diff --git a/crypto/sha/asm/sha1-586.pl b/crypto/sha/asm/sha1-586.pl
index 4895eb3..e0b5d83 100644
--- a/crypto/sha/asm/sha1-586.pl
+++ b/crypto/sha/asm/sha1-586.pl
@@ -66,9 +66,9 @@
# switch to AVX alone improves performance by as little as 4% in
# comparison to SSSE3 code path. But below result doesn't look like
# 4% improvement... Trouble is that Sandy Bridge decodes 'ro[rl]' as
-# pair of µ-ops, and it's the additional µ-ops, two per round, that
+# pair of µ-ops, and it's the additional µ-ops, two per round, that
# make it run slower than Core2 and Westmere. But 'sh[rl]d' is decoded
-# as single µ-op by Sandy Bridge and it's replacing 'ro[rl]' with
+# as single µ-op by Sandy Bridge and it's replacing 'ro[rl]' with
# equivalent 'sh[rl]d' that is responsible for the impressive 5.1
# cycles per processed byte. But 'sh[rl]d' is not something that used
# to be fast, nor does it appear to be fast in upcoming Bulldozer
diff --git a/crypto/sha/asm/sha256-586.pl b/crypto/sha/asm/sha256-586.pl
index 6462e45..e907714 100644
--- a/crypto/sha/asm/sha256-586.pl
+++ b/crypto/sha/asm/sha256-586.pl
@@ -10,7 +10,7 @@
# SHA256 block transform for x86. September 2007.
#
# Performance improvement over compiler generated code varies from
-# 10% to 40% [see below]. Not very impressive on some µ-archs, but
+# 10% to 40% [see below]. Not very impressive on some µ-archs, but
# it's 5 times smaller and optimizies amount of writes.
#
# May 2012.
diff --git a/crypto/sha/asm/sha512-586.pl b/crypto/sha/asm/sha512-586.pl
index e96ec00..2f6a202 100644
--- a/crypto/sha/asm/sha512-586.pl
+++ b/crypto/sha/asm/sha512-586.pl
@@ -37,7 +37,7 @@
#
# IALU code-path is optimized for elder Pentiums. On vanilla Pentium
# performance improvement over compiler generated code reaches ~60%,
-# while on PIII - ~35%. On newer µ-archs improvement varies from 15%
+# while on PIII - ~35%. On newer µ-archs improvement varies from 15%
# to 50%, but it's less important as they are expected to execute SSE2
# code-path, which is commonly ~2-3x faster [than compiler generated
# code]. SSE2 code-path is as fast as original sha512-sse2.pl, even