Add util/read_symbols.go

- Add util/read_symbols.go to read exported symbols from an AR
  file for use with the symbol prefixing feature
- Move util/fipstools/fipscommon/ar.go -> util/ar/ar.go
- util/ar/ar.go: Support BSD-style AR files

Change-Id: I171b3b952e69c4b87ac04751b7dba3ea9bc2504e
Reviewed-on: https://boringssl-review.googlesource.com/32024
Reviewed-by: David Benjamin <davidben@google.com>
diff --git a/BUILDING.md b/BUILDING.md
index ab9b71d..01f5480 100644
--- a/BUILDING.md
+++ b/BUILDING.md
@@ -128,7 +128,11 @@
 `/path/to/symbols.txt`.
 
 It is currently the caller's responsibility to create and maintain the list of
-symbols to be prefixed.
+symbols to be prefixed. Alternatively, `util/read_symbols.go` reads the list of
+exported symbols from a `.a` file, and can be used in a build script to generate
+the symbol list on the fly (by building without prefixing, using
+`read_symbols.go` to construct a symbol list, and then building again with
+prefixing).
 
 This mechanism is under development and may change over time. Please contact the
 BoringSSL maintainers if making use of it.
diff --git a/util/fipstools/fipscommon/ar.go b/util/ar/ar.go
similarity index 79%
rename from util/fipstools/fipscommon/ar.go
rename to util/ar/ar.go
index 85b378d..f5dee62 100644
--- a/util/fipstools/fipscommon/ar.go
+++ b/util/ar/ar.go
@@ -14,11 +14,12 @@
 
 // ar.go contains functions for parsing .a archive files.
 
-package fipscommon
+package ar
 
 import (
 	"bytes"
 	"errors"
+	"fmt"
 	"io"
 	"strconv"
 	"strings"
@@ -113,6 +114,33 @@
 			name = strings.TrimRight(name, "/")
 		}
 
+		// Post-processing for BSD:
+		// https://en.wikipedia.org/wiki/Ar_(Unix)#BSD_variant
+		//
+		// If the name is of the form #1/XXX, XXX identifies the length of the
+		// name, and the name itself is stored as a prefix of the data, possibly
+		// null-padded.
+
+		var namelen uint
+		n, err := fmt.Sscanf(name, "#1/%d", &namelen)
+		if err == nil && n == 1 && len(contents) >= int(namelen) {
+			name = string(contents[:namelen])
+			contents = contents[namelen:]
+
+			// Names can be null padded; find the first null (if any). Note that
+			// this also handles the case of a null followed by non-null
+			// characters. It's not clear whether those can ever show up in
+			// practice, but we might as well handle them in case they can show
+			// up.
+			var null int
+			for ; null < len(name); null++ {
+				if name[null] == 0 {
+					break
+				}
+			}
+			name = name[:null]
+		}
+
 		ret[name] = contents
 	}
 
diff --git a/util/fipstools/delocate/delocate.go b/util/fipstools/delocate/delocate.go
index a8c4fd0..4734285 100644
--- a/util/fipstools/delocate/delocate.go
+++ b/util/fipstools/delocate/delocate.go
@@ -26,6 +26,7 @@
 	"strconv"
 	"strings"
 
+	"boringssl.googlesource.com/boringssl/util/ar"
 	"boringssl.googlesource.com/boringssl/util/fipstools/fipscommon"
 )
 
@@ -1425,7 +1426,7 @@
 			}
 			defer arFile.Close()
 
-			ar, err := fipscommon.ParseAR(arFile)
+			ar, err := ar.ParseAR(arFile)
 			if err != nil {
 				return err
 			}
diff --git a/util/fipstools/inject-hash/inject-hash.go b/util/fipstools/inject-hash/inject-hash.go
index 14418a3..d079654 100644
--- a/util/fipstools/inject-hash/inject-hash.go
+++ b/util/fipstools/inject-hash/inject-hash.go
@@ -29,6 +29,7 @@
 	"io/ioutil"
 	"os"
 
+	"boringssl.googlesource.com/boringssl/util/ar"
 	"boringssl.googlesource.com/boringssl/util/fipstools/fipscommon"
 )
 
@@ -45,7 +46,7 @@
 		}
 		defer arFile.Close()
 
-		ar, err := fipscommon.ParseAR(arFile)
+		ar, err := ar.ParseAR(arFile)
 		if err != nil {
 			return err
 		}
diff --git a/util/read_symbols.go b/util/read_symbols.go
new file mode 100644
index 0000000..5e3a177
--- /dev/null
+++ b/util/read_symbols.go
@@ -0,0 +1,170 @@
+// Copyright (c) 2018, Google Inc.
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+// read_symbols.go scans one or more .a files and, for each object contained in
+// the .a files, reads the list of symbols in that object file.
+package main
+
+import (
+	"bytes"
+	"debug/elf"
+	"debug/macho"
+	"flag"
+	"fmt"
+	"os"
+	"runtime"
+	"sort"
+	"strings"
+
+	"boringssl.googlesource.com/boringssl/util/ar"
+)
+
+const (
+	ObjFileFormatELF   = "elf"
+	ObjFileFormatMachO = "macho"
+)
+
+var outFlag = flag.String("out", "-", "File to write output symbols")
+var objFileFormat = flag.String("obj-file-format", defaultObjFileFormat(runtime.GOOS), "Object file format to expect (options are elf, macho)")
+
+func defaultObjFileFormat(goos string) string {
+	switch goos {
+	case "linux":
+		return ObjFileFormatELF
+	case "darwin":
+		return ObjFileFormatMachO
+	default:
+		// By returning a value here rather than panicking, the user can still
+		// cross-compile from an unsupported platform to a supported platform by
+		// overriding this default with a flag. If the user doesn't provide the
+		// flag, we will panic during flag parsing.
+		return "unsupported"
+	}
+}
+
+func main() {
+	flag.Parse()
+	if flag.NArg() < 1 {
+		fmt.Fprintf(os.Stderr, "Usage: %s [-out OUT] [-obj-file-format FORMAT] ARCHIVE_FILE [ARCHIVE_FILE [...]]\n", os.Args[0])
+		os.Exit(1)
+	}
+	archiveFiles := flag.Args()
+
+	out := os.Stdout
+	if *outFlag != "-" {
+		var err error
+		out, err = os.Create(*outFlag)
+		nilOrPanic(err, "failed to open output file")
+		defer out.Close()
+	}
+
+	var symbols []string
+	// Only add first instance of any symbol; keep track of them in this map.
+	added := make(map[string]bool)
+	for _, archive := range archiveFiles {
+		f, err := os.Open(archive)
+		nilOrPanic(err, "failed to open archive file %s", archive)
+		objectFiles, err := ar.ParseAR(f)
+		nilOrPanic(err, "failed to read archive file %s", archive)
+
+		for name, contents := range objectFiles {
+			if !strings.HasSuffix(name, ".o") {
+				continue
+			}
+			for _, s := range listSymbols(name, contents) {
+				if !added[s] {
+					added[s] = true
+					symbols = append(symbols, s)
+				}
+			}
+		}
+	}
+	sort.Strings(symbols)
+	for _, s := range symbols {
+		// Filter out C++ mangled names.
+		prefix := "_Z"
+		if runtime.GOOS == "darwin" {
+			prefix = "__Z"
+		}
+		if !strings.HasPrefix(s, prefix) {
+			fmt.Fprintln(out, s)
+		}
+	}
+}
+
+// listSymbols lists the exported symbols from an object file.
+func listSymbols(name string, contents []byte) []string {
+	switch *objFileFormat {
+	case ObjFileFormatELF:
+		return listSymbolsELF(name, contents)
+	case ObjFileFormatMachO:
+		return listSymbolsMachO(name, contents)
+	default:
+		panic(fmt.Errorf("unsupported object file format %v", *objFileFormat))
+	}
+}
+
+func listSymbolsELF(name string, contents []byte) []string {
+	f, err := elf.NewFile(bytes.NewReader(contents))
+	nilOrPanic(err, "failed to parse ELF file %s", name)
+	syms, err := f.Symbols()
+	nilOrPanic(err, "failed to read symbol names from ELF file %s", name)
+
+	var names []string
+	for _, sym := range syms {
+		// Only include exported, defined symbols
+		if elf.ST_BIND(sym.Info) != elf.STB_LOCAL && sym.Section != elf.SHN_UNDEF {
+			names = append(names, sym.Name)
+		}
+	}
+	return names
+}
+
+func listSymbolsMachO(name string, contents []byte) []string {
+	f, err := macho.NewFile(bytes.NewReader(contents))
+	nilOrPanic(err, "failed to parse Mach-O file %s", name)
+	if f.Symtab == nil {
+		return nil
+	}
+	var names []string
+	for _, sym := range f.Symtab.Syms {
+		// Source: https://opensource.apple.com/source/xnu/xnu-3789.51.2/EXTERNAL_HEADERS/mach-o/nlist.h.auto.html
+		const (
+			N_PEXT uint8 = 0x10 // Private external symbol bit
+			N_EXT  uint8 = 0x01 // External symbol bit, set for external symbols
+			N_TYPE uint8 = 0x0e // mask for the type bits
+
+			N_UNDF uint8 = 0x0 // undefined, n_sect == NO_SECT
+			N_ABS  uint8 = 0x2 // absolute, n_sect == NO_SECT
+			N_SECT uint8 = 0xe // defined in section number n_sect
+			N_PBUD uint8 = 0xc // prebound undefined (defined in a dylib)
+			N_INDR uint8 = 0xa // indirect
+		)
+
+		// Only include exported, defined symbols.
+		if sym.Type&N_EXT != 0 && sym.Type&N_TYPE != N_UNDF {
+			if len(sym.Name) == 0 || sym.Name[0] != '_' {
+				panic(fmt.Errorf("unexpected symbol without underscore prefix: %v", sym.Name))
+			}
+			names = append(names, sym.Name[1:])
+		}
+	}
+	return names
+}
+
+func nilOrPanic(err error, f string, args ...interface{}) {
+	if err != nil {
+		panic(fmt.Errorf(f+": %v", append(args, err)...))
+	}
+}