blob: e8a4a58b43289b28c812904ddba911582e6db2c6 [file]
#!/bin/sh
#
# Copyright (c) 2025 The BoringSSL Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Audit source code for identifiers that do not match symbol prefixing
# guidelines.
#
# Note that this tool is a prototype; once prefixing public symbols has been
# implemented, a much simpler tool can be created and integrated directly into
# the build process rather than running as a separate audit with bespoke cache
# files.
#
# TODO(crbug.com/42220000): Eventually port it into the build process.
set -e
set -o pipefail
: ${workdir:=$PWD/build}
mkdir -p "${workdir}"
include_files() {
for file in "$@"; do
echo "#include <${file}>"
done
}
source_to_ast() {
# Note: for now not including any symbols defined by asm functions.
# The full story for those still needs to be figured out.
clang++ \
-DOPENSSL_NO_ASM \
-Iinclude \
-Ithird_party/benchmark/include \
-Ithird_party/googletest/googlemock/include \
-Ithird_party/googletest/googletest/include \
-fsyntax-only \
-Xclang -ast-dump=json \
"$@"
}
ast_to_identifiers() {
go run util/extract_identifiers_clang_json.go "$@"
}
c_source_to_identifiers() {
source_to_ast -x c -std=c17 "$@" | ast_to_identifiers --language=C
}
c_source_to_symbols() {
source_to_ast -x c -std=c17 "$@" | ast_to_identifiers --language=C --global_symbols_only
}
cc_source_to_identifiers() {
source_to_ast -x c++ -std=c++17 "$@" | ast_to_identifiers --language=C++
}
cc_source_to_symbols() {
source_to_ast -x c++ -std=c++17 "$@" | ast_to_identifiers --language=C++ --global_symbols_only
}
lib_sources() {
jq -r '
[.bcm, .crypto, .decrepit] |
.[] |
(.internal_hdrs + .srcs) |
.[]?
' gen/sources.json |\
perl -pe '$_ = "" if /\.inc$/m and not /\.cc\.inc$/m' |\
grep -vxF 'crypto/curve25519/curve25519_tables.h' |\
grep -vxF 'crypto/fipsmodule/ec/builtin_curves.h' |\
grep -vxF 'crypto/fipsmodule/ec/p256-nistz-table.h' |\
grep -vxF 'crypto/fipsmodule/ec/p256_table.h' |\
grep -vxF 'crypto/obj/obj_dat.h' |\
grep -vE '^third_party/.*' |\
sort -u
}
public_c_includes() {
# Note: using |hdrs| of _all_ modules. The assumption is that all
# |hdrs| are public.
jq -r '
del(.pki) |
.[] |
.hdrs |
.[]?
' gen/sources.json |\
sed -e 's,^include/,,'
}
public_cc_includes() {
# Note: using |hdrs| of _all_ modules. The assumption is that all
# |hdrs| are public.
jq -r '
.[] |
.hdrs |
.[]?
' gen/sources.json |\
sed -e 's,^include/,,'
}
set_difference() {
{
sort -u < "$1"
shift
for f in "$@"; do
cat "${f}"
cat "${f}"
done
} | sort | uniq -u
}
filter_bssl() {
# Whatever is in the bssl namespace is OK.
grep -vE ' bssl::' || true
}
filter_no_symbols() {
# Whatever is static or an enumerator never becomes part of a symbol.
grep -vE "^(static|enumerator) " || true
}
filter_expected_symbols() {
# Ignore types for now (they can become _part_ of a symbol name,
# but are unlikely to clash).
grep -vE "^(class|enum|struct|typedef|union|using) " || true
}
filter_stl() {
# These symbols are often declared to support the STL and are benign,
# as they have C++ linkage and very specific arguments and thus
# non-conflicting mangled names.
grep -vE 'extern "C\+\+" inline function (begin|end);' || true
}
filter_enum() {
# Some enums are declared by public headers only in C++.
# This is intended.
grep -vE "^enum " || true
}
filter_difference() {
filter_bssl |\
filter_no_symbols |\
filter_expected_symbols |\
filter_stl
}
fix_c_cc_include_deltas() {
# OPENSSL_INLINE behaves differently in C and C++.
sed -e 's,^static inline ,extern "C" inline ,g' |\
filter_bssl |\
filter_stl |\
filter_enum |\
sort -u
}
filter_global_symbols() {
# Some symbols need to remain in global scope:
# - begin/end: have bssl-owned template args.
# - timeval: forward declared by us.
# One symbol is "annoying":
# - PKCS7: names an anonymous struct type, and is also used in macro
# concatenation. Should later just give it a struct tag so the
# typedef stops being annoying.
grep -vE '^(begin|end|timeval)$' |\
grep -vxF 'PKCS7'
}
echo >&2 'Indexing C++ includes...'
include_files $(public_cc_includes) | cc_source_to_identifiers - > "${workdir}"/include.cc.ids
include_files $(public_cc_includes) | cc_source_to_symbols - > "${workdir}"/include.cc.syms
echo >&2 'Indexing C includes...'
include_files $(public_c_includes) | c_source_to_identifiers - > "${workdir}"/include.c.ids
include_files $(public_c_includes) | c_source_to_symbols - > "${workdir}"/include.c.syms
echo >&2 'Indexing C includes as C++...'
include_files $(public_c_includes) | cc_source_to_identifiers - > "${workdir}"/include.c_as_cc.ids
include_files $(public_c_includes) | cc_source_to_symbols - > "${workdir}"/include.c_as_cc.syms
echo >&2 'Merging symbol lists...'
cat "${workdir}"/include.c.syms "${workdir}"/include.cc.syms "${workdir}"/include.c_as_cc.syms |\
filter_global_symbols |\
sort -u > "${workdir}"/include.syms
# Check that the headers behave the same if included by C and C++ files, other
# than for expected diffs.
echo >&2 'Comparing C includes across including language...'
fix_c_cc_include_deltas < "${workdir}"/include.c.ids > "${workdir}"/include.c.common.ids
fix_c_cc_include_deltas < "${workdir}"/include.c_as_cc.ids > "${workdir}"/include.c_as_cc.common.ids
diff -u "${workdir}"/include.c.common.ids "${workdir}"/include.c_as_cc.common.ids
# Check that no source file defines any public symbols that are not in the
# public headers, namespaced or otherwise OK'd.
max_jobs=16
set --
wait_jobs() {
wait
for f in "$@"; do
[ -f "${f}" ]
done
}
for file in $(lib_sources); do
if ! [ -f "${workdir}/${file}.ids" ]; then
echo >&2 "Indexing ${file} (new)..."
elif [ "${file}" -nt ${workdir}/"${file}.ids" ]; then
echo >&2 "Indexing ${file} (changed)..."
rm -f "${workdir}/${file}.ids"
else
perl -nE 'm!// (\S+):! and say $1' "${workdir}/${file}.ids" |\
sort -u |\
while read -r dep; do
if [ "${dep}" -nt "${workdir}/${file}.ids" ]; then
echo >&2 "Indexing ${file} (dependency ${dep} changed)..."
rm -f "${workdir}/${file}.ids"
break
fi
done
if [ -f "${workdir}/${file}.ids" ]; then
continue
fi
fi
# NOTE: It'd be nice to only recompute ${file}.ids if ${file} _or_ one
# of its dependencies changed.
{
mkdir -p "${workdir}/${file%/*}"
case "$file" in
*.c)
c_source_to_identifiers "${file}"
;;
**)
# This includes headers too.
cc_source_to_identifiers "${file}"
;;
esac > "${workdir}/${file}.ids.new"
mv "${workdir}/${file}.ids.new" "${workdir}/${file}.ids"
} &
set -- "$@" "${workdir}/${file}.ids"
if [ $# -ge ${max_jobs} ]; then
wait_jobs "$@"
set --
fi
done
wait_jobs "$@"
echo >&2 'Comparing symbols...'
for file in $(lib_sources); do
set_difference "${workdir}/${file}.ids" "${workdir}"/include.cc.ids | filter_difference
done | sort -u | { ! grep .; }