blob: 37fc5d909d34506c9baa59c79f8c1773d089f91d [file] [log] [blame] [edit]
// Copyright (c) 2025 The BoringSSL Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build ignore
// extract_identifiers_clang_json parses the BoringSSL public includes and (for now)
// outputs a report of all identifiers defined therein. Sample usage:
//
// for f in include/openssl/*.h; do echo "#include <${f#include/}>"; done |\
// clang++ -x c++ -std=c++17 -Iinclude -fsyntax-only -Xclang -ast-dump=json - \
// go run util/extract_identifiers_clang_json.go > extract_identifiers.txt
//
// Note that right now the output of this tool is for human use only.
// The tool will likely be changed further for the purpose of symbol prefixing
// and auditing thereof.
package main
import (
"encoding/json"
"flag"
"fmt"
"log"
"os"
"regexp"
"strings"
)
var (
dumpTree = flag.Bool("dump_tree", false, "dump syntax tree while processing")
dumpFullTree = flag.Bool("dump_full_tree", false, "dump syntax tree while processing including system headers")
keepGoing = flag.Bool("keep_going", false, "continue even after errors")
language = flag.String("language", "C", "language to consider the source to be")
)
// node is a node from the Clang AST dump.
type node struct {
Kind string
Loc loc
Inner []*node `json:",omitempty"`
Decl *node
// Node fields that may or may not matter depending on `Kind`.
CompleteDefinition bool `json:",omitempty"`
IsImplicit bool `json:",omitempty"`
Language string `json:",omitempty"`
Name string `json:",omitempty"`
PreviousDecl string `json:",omitempty"`
StorageClass string `json:",omitempty"`
TagUsed string `json:",omitempty"`
}
// loc is a location from the Clang AST dump.
type loc struct {
File string `json:",omitempty"`
SpellingLoc *loc `json:",omitempty"`
ExpansionLoc *loc `json:",omitempty"`
}
// file finds the file path of a loc.
func (l loc) file() string {
if l.ExpansionLoc != nil {
return l.ExpansionLoc.file()
}
if l.SpellingLoc != nil {
return l.SpellingLoc.file()
}
return l.File
}
// decompress undoes the filename field compression from
// JSONNodeDumper::writeSourceLocation and JSONNodeDumper::writeBareSourceLocation.
func (l *loc) decompress(lastFile *string) {
if l == nil {
return
}
l.SpellingLoc.decompress(lastFile)
l.ExpansionLoc.decompress(lastFile)
if l.SpellingLoc != nil || l.ExpansionLoc != nil {
return
}
if l.File == "" {
l.File = *lastFile
} else {
*lastFile = l.File
}
}
// decompressLocsInternal is a helper for decompressLocs.
//
// It keeps state in its lastFile pointer.
func (n *node) decompressLocsInternal(lastFile *string) {
n.Loc.decompress(lastFile)
for _, child := range n.Inner {
child.decompressLocsInternal(lastFile)
}
}
// decompressLocs decompresses all Loc fields below a node.
//
// Should be called right after parsing.
func (n *node) decompressLocs() {
var lastFile string
n.decompressLocsInternal(&lastFile)
}
// storage represents the storage class of a node.
type storage int
const (
noStorage storage = iota
externStorage
staticStorage
)
// storage finds the storage class of the node.
func (n node) storage() (storage, error) {
var storage storage
switch n.StorageClass {
case "", "extern":
storage = externStorage
case "static":
storage = staticStorage
default:
return noStorage, fmt.Errorf("no handling for storage class %q", n.StorageClass)
}
return storage, nil
}
// namespacing indicates how the identifier respects namespaces.
type namespacing int
const (
alwaysGlobal namespacing = iota // Never in namespace (such as preprocessor macros).
globalIfC // Respects namespace unless in extern "C" (such as functions).
alwaysNamespaced // Always respects namespace (such as types).
)
// linking indicates how the identifier responds to extern "C" or similar.
type linking int
const (
neverLinked linking = iota // Ignores linkage information (such as types).
respectsLinkage // Respects linkage information (such as functions).
)
// walker is data that is transported to inner nodes while parsing.
type walker struct {
*walkerStatic // Data that can be mutated even by downstream nodes.
inBoringSSL bool // Whether the code originates from BoringSSL.
depth int // Nesting depth (for -dump_tree output).
namespace []string // C++ namespace sequence the node is in.
anonNamespace bool // Whether the node is in a C++ anonymous namespace.
language string // Can be "C" or "C++".
record bool // Whether the current node is part of a record.
}
// walkerStatic is data that is transported in reading direction while parsing.
type walkerStatic struct {
seen map[string]string // All identifiers seen so far.
}
func newWalker() walker {
return walker{
walkerStatic: &walkerStatic{
seen: map[string]string{},
},
language: *language,
}
}
// Consider files with a non-absolute path to be BoringSSL,
// whereas absolute paths usually indicate system header locations.
//
// Note that any non-word character in the first two characters is treated as
// indicating an absolute path to catch "<built-in>", "/foo/bar.h" and "C:\foo\bar.h".
var (
boringSSLPath = regexp.MustCompile(`^\w\w`)
)
// updateInBoringSSL checks whether the given directive is a file/line directive,
// and if so, checks if it's likely part of BoringSSL or not.
//
// The return value indicates whether it's a file/line directive.
// If it is, `*in` will be updated to the current status of whether this is BoringSSL.
func (w *walker) updateInBoringSSL(kind string, loc loc) {
if kind == "TranslationUnitDecl" {
w.inBoringSSL = true
return
}
w.inBoringSSL = boringSSLPath.MatchString(loc.file())
}
// visit traverses a node in the AST and analyzes it for identifiers contained therein.
func (w walker) visit(n *node) (err error) {
nodeWithoutChildren := *n
nodeWithoutChildren.Inner = nil
nodeCode, err := json.Marshal(nodeWithoutChildren)
if err != nil {
return err
}
if (*dumpTree && w.inBoringSSL) || *dumpFullTree {
log.Printf("%*s[%s] %s: %s (%d children)",
w.depth, "",
strings.Join(w.namespace, "::"),
n.Kind,
nodeCode,
len(n.Inner))
}
// Allow to ignore errors.
defer func() {
if *keepGoing && err != nil {
log.Printf("ERROR: %v", err)
err = nil
}
}()
// Update "w".
w.depth++
// Update "in BoringSSL".
w.updateInBoringSSL(n.Kind, n.Loc)
if !w.inBoringSSL || n.IsImplicit {
// If suppressed, below nodes are not interesting.
// Also, skip any non-BoringSSL code such as system headers.
return nil
}
switch n.Kind {
// Nodes that need handling.
case "CXXRecordDecl", "RecordDecl":
if w.record && n.CompleteDefinition {
return nil
}
if n.Name != "" {
if err := w.collectIdentifier(n.TagUsed, alwaysNamespaced, neverLinked, noStorage, n.Name); err != nil {
return err
}
}
w.record = true
case "EnumDecl":
if w.record {
return nil
}
if n.Name != "" {
if err := w.collectIdentifier("enum", alwaysNamespaced, neverLinked, noStorage, n.Name); err != nil {
return err
}
}
case "EnumConstantDecl":
if w.record {
return nil
}
if err := w.collectIdentifier("enumerator", alwaysNamespaced, neverLinked, noStorage, n.Name); err != nil {
return err
}
return nil // Do not recurse.
case "FunctionDecl":
if w.record {
return nil
}
if n.PreviousDecl != "" {
return // Definition or redeclaration doesn't need to be looked at again (and may have incomplete qualifiers).
}
storage, err := n.storage()
if err != nil {
return fmt.Errorf("could not find storage class of function: %w: %s", err, nodeCode)
}
if err := w.collectIdentifier("function", globalIfC, respectsLinkage, storage, n.Name); err != nil {
return err
}
case "LinkageSpecDecl":
if n.Language != "" {
w.language = n.Language
}
case "NamespaceDecl":
if n.Name == "" {
w.anonNamespace = true
} else {
w.namespace = append(append([]string(nil), w.namespace...), n.Name)
}
case "TypeAliasDecl", "TypeAliasTemplateDecl":
if w.record {
return nil
}
if err := w.collectIdentifier("using", alwaysNamespaced, neverLinked, noStorage, n.Name); err != nil {
return err
}
case "TypedefDecl":
if w.record {
return nil
}
if len(n.Inner) == 1 && n.Inner[0].Kind == "ElaboratedType" && len(n.Inner[0].Inner) == 1 && n.Inner[0].Inner[0].Decl != nil && n.Inner[0].Inner[0].Decl.Name == n.Name {
// typedef struct X X;
return nil
}
if err := w.collectIdentifier("typedef", alwaysNamespaced, neverLinked, noStorage, n.Name); err != nil {
return err
}
case "VarDecl":
if n.PreviousDecl != "" {
return // Definition or redeclaration doesn't need to be looked at again (and may have incomplete qualifiers).
}
storage, err := n.storage()
if err != nil {
return fmt.Errorf("could not find storage class of variable: %w: %s", err, nodeCode)
}
if err := w.collectIdentifier("var", globalIfC, respectsLinkage, storage, n.Name); err != nil {
return err
}
return nil // Do not recurse. (Maybe should, to catch `struct ...` in variable types?)
// Singletons that should be skipped.
case
"AccessSpecDecl",
"AlignedAttr",
"BuiltinAttr",
"BuiltinType",
"ConstAttr",
"DependentNameType",
"DeprecatedAttr",
"EnumType",
"FormatAttr",
"NoThrowAttr",
"ParmVarDecl",
"RecordType",
"UnresolvedUsingValueDecl",
"UnusedAttr",
"UsingDirectiveDecl",
"VectorType",
"WarnUnusedResultAttr":
if len(n.Inner) != 0 {
// If this ever fires, check AST to see if any of the node's children could be useful,
// then categorize the node type into one of the following two cases.
return fmt.Errorf("singleton node of kind %q has children: %s", n.Kind, nodeCode)
}
// Nodes that should be skipped including possible children.
case
"CXXConstructorDecl",
"CXXConversionDecl",
"CXXDeductionGuideDecl",
"CXXDestructorDecl",
"CXXMethodDecl",
"ClassTemplatePartialSpecializationDecl",
"ClassTemplateSpecializationDecl",
"CompoundStmt",
"FieldDecl",
"FriendDecl",
"NonTypeTemplateParmDecl",
"StaticAssertDecl",
"TemplateArgument",
"TemplateTypeParmDecl",
"VarTemplateDecl":
return nil // Do not recurse.
// Nodes that should just be recursed into.
case
"ClassTemplateDecl",
"ConstantArrayType",
"DecayedType",
"ElaboratedType",
"FunctionProtoType",
"FunctionTemplateDecl",
"IndirectFieldDecl",
"ParenType",
"PointerType",
"QualType",
"TemplateSpecializationType",
"TranslationUnitDecl",
"TypedefType":
// Just recurse.
default:
return fmt.Errorf("no handling for node kind %q: %s", n.Kind, nodeCode)
}
// If we get here (via fallthrough usually), we want to recurse.
// To avoid recursing, use return.
for _, child := range n.Inner {
err = w.visit(child)
if err != nil {
break
}
}
return err
}
// collectIdentifier sends an identifier to the output.
func (w walker) collectIdentifier(tag string, namespacing namespacing, linking linking, storage storage, name string) error {
var fqn string
if w.anonNamespace {
fqn = "<anonymous>::" + name
} else {
fqn = strings.Join(append(append([]string(nil), w.namespace...), name), "::")
}
var linkage string
switch linking {
case neverLinked:
linkage = ""
case respectsLinkage:
switch storage {
case externStorage:
linkage = fmt.Sprintf("extern %q ", w.language)
case staticStorage:
linkage = "static "
default:
return fmt.Errorf("respecting storage, but storage not set for %v", fqn)
}
}
var identifier string
switch namespacing {
case alwaysGlobal:
identifier = name
case globalIfC:
if w.language != "C" {
identifier = fqn
} else {
identifier = name
}
case alwaysNamespaced:
identifier = fqn
}
declaration := fmt.Sprintf("%s%s %s;", linkage, tag, identifier)
key := identifier
seen, found := w.seen[key]
if found {
if seen != declaration {
return fmt.Errorf("duplicate distinct definition of %v: %v and %v", key, seen, declaration)
}
return nil
}
w.seen[key] = declaration
fmt.Printf("%s\n", declaration)
return nil
}
// Main is the main program.
func Main() error {
j := json.NewDecoder(os.Stdin)
w := newWalker()
for j.More() {
var root node
err := j.Decode(&root)
if err != nil {
return err
}
root.decompressLocs()
err = w.visit(&root)
if err != nil {
return err
}
}
return nil
}
// main runs Main turning errors into exit codes.
func main() {
flag.Parse()
err := Main()
if err != nil {
log.Panicf("error returned from Main: %v", err)
}
}