tflite编译

安装bazel

从apt安装

1.添加配置

sudo apt install apt-transport-https curl gnupg -y
curl -fsSL https://bazel.build/bazel-release.pub.gpg | gpg --dearmor >bazel-archive-keyring.gpg
sudo mv bazel-archive-keyring.gpg /usr/share/keyrings
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/bazel-archive-keyring.gpg] https://storage.googleapis.com/bazel-apt stable jdk1.8" | sudo tee /etc/apt/sources.list.d/bazel.list

2.安装

sudo apt update && sudo apt install bazel=4.2.1

从源码安装

1.安装依赖

sudo apt install pkg-config zip g++ zlib1g-dev unzip python

2.下载源码

git clone https://github.com/bazelbuild/bazel.git
cd bazel

3.编译

./compile.sh

4.安装

sudo cp output/bazel /usr/local/bin/bazel

下载预编译二进制安装

下载地址: https://github.com/bazelbuild/bazel/releases
运行方式

chmod +x bazel-version-installer-linux-x86_64.sh
./bazel-version-installer-linux-x86_64.sh --user

配置环境变量

export PATH="$PATH:$HOME/bin"

安装TensorFlow Lite

官方教程: https://www.tensorflow.org/lite/guide/build_arm?hl=zh-cn

1.下载源码

git clone https://github.com/tensorflow/tensorflow.git
cd tensorflow

或者下载指定版本

git clone -b v2.15.0 https://github.com/tensorflow/tensorflow.git

手动下载

wget https://github.com/tensorflow/tensorflow/archive/v2.15.0.zip
unzip v2.15.0.zip
cd tensorflow-2.15.0

通过bazel安装

配置

./configure

3.编译x86
on wsl(18.04)

bazel build --config=monolithic //tensorflow/lite:libtensorflowlite.so

经测试,报GLIB2.7错误,发现编译的Ubuntu(22.04)版本与应用的UBUNTU(20.04)版本不同,
使用相同的WSL版本或者docker版本可解决

4.编译arm64
官方

bazel build --config=elinux_aarch64 -c opt //tensorflow/lite:libtensorflowlite.so

经测试,报GLIB2.7错误,默认使用的是 GCC 8.3 ,构建的二进制文件需要 glibc 2.28 或更高版本。如果您的目标是较低版本的 glibc,则需要使用旧版 GCC 工具链。

bazel方式复制头文件和lib文件

此方式不好挂代理和不好设置交叉编译工具链,可编译出动态库
头文件

cd tensorflow/tensorflow
find ./lite -name "*.h" | tar -cf headers.tar -T -

lib文件

tensorflow/bazel-bin/tensorflow/lite/libtensorflowlite.so

flatbuffer

tensorflow/bazel-bin/external/flatbuffers/libflatbuffers.a

tensorflow/tensorflow/lite/tools/make下执行download_dependencies.sh,
或者直接下载flatbuffers压缩包,然后取出include的内容放置到cpp_tflite/include中即可。

bazel 尝试自定义工具链编译

cmake编译出来的要么是.a文件,要么是十多个so文件,bazel能编译出so文件,并且比cmake编译出的.a文件少一半,因此尝试此方式
交叉工具链位于/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu
参考已有的工具链修改/home/jw/.cache/bazel/_bazel_jw/e63bf23c7ad9a33bf748ef6d0ba1dfc8/external/local_config_embedded_arm

cd  tensorflow-2.15.0 && mkdir rk3308_toolchain && touch BUILD && touch cc_config.btl

编辑BUILD ,填入以下内容

load(":cc_config.bzl", "cc_toolchain_config")

package(default_visibility = ["//visibility:public"])

licenses(["restricted"]) # GPLv3

cc_toolchain_suite(
name = "toolchain",
toolchains = {
"aarch64": ":cc-compiler-aarch64",
},
)

filegroup(
name = "empty",
srcs = [],
)

filegroup(
name = "aarch64_toolchain_all_files",
srcs = [
"@aarch64_linux_toolchain//:compiler_pieces", # 修改:指向外部仓库的 all_files
],
)

cc_toolchain_config(
name = "aarch64_toolchain_config",
cpu = "aarch64",
)

cc_toolchain(
name = "cc-compiler-aarch64",
all_files = ":aarch64_toolchain_all_files",
compiler_files = ":aarch64_toolchain_all_files",
dwp_files = ":empty",
linker_files = ":aarch64_toolchain_all_files",
objcopy_files = ":aarch64_toolchain_all_files",
strip_files = ":aarch64_toolchain_all_files",
supports_param_files = 1,
toolchain_config = ":aarch64_toolchain_config",
)

编辑cc_config.bzl ,填入以下内容

load("@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl",
"action_config",
"artifact_name_pattern",
"env_entry",
"env_set",
"feature",
"feature_set",
"flag_group",
"flag_set",
"make_variable",
"tool",
"tool_path",
"variable_with_value",
"with_feature_set",
)
load("@bazel_tools//tools/build_defs/cc:action_names.bzl", "ACTION_NAMES")

def _impl(ctx):
toolchain_identifier = "aarch64-linux-gnu" # 修改:更通用的标识符
host_system_name = "x86_64-linux-gnu" # 修改:你的构建主机
target_system_name = "aarch64-linux-gnu" # 修改:目标系统
target_cpu = "aarch64"
target_libc = "aarch64" # 修改:通常是 glibc
abi_version = "aarch64" # 修改:通常是 aapcs
abi_libc_version = "aarch64" # 修改:根据你的 glibc 版本

compiler = "compiler"

cc_target_os = None # 修改:目标操作系统

builtin_sysroot = None # 修改:通常不需要显式设置

all_compile_actions = [
ACTION_NAMES.c_compile,
ACTION_NAMES.cpp_compile,
ACTION_NAMES.linkstamp_compile,
ACTION_NAMES.assemble,
ACTION_NAMES.preprocess_assemble,
ACTION_NAMES.cpp_header_parsing,
ACTION_NAMES.cpp_module_compile,
ACTION_NAMES.cpp_module_codegen,
ACTION_NAMES.clif_match,
ACTION_NAMES.lto_backend,
]

all_cpp_compile_actions = [
ACTION_NAMES.cpp_compile,
ACTION_NAMES.linkstamp_compile,
ACTION_NAMES.cpp_header_parsing,
ACTION_NAMES.cpp_module_compile,
ACTION_NAMES.cpp_module_codegen,
ACTION_NAMES.clif_match,
]

preprocessor_compile_actions = [
ACTION_NAMES.c_compile,
ACTION_NAMES.cpp_compile,
ACTION_NAMES.linkstamp_compile,
ACTION_NAMES.preprocess_assemble,
ACTION_NAMES.cpp_header_parsing,
ACTION_NAMES.cpp_module_compile,
ACTION_NAMES.clif_match,
]

codegen_compile_actions = [
ACTION_NAMES.c_compile,
ACTION_NAMES.cpp_compile,
ACTION_NAMES.linkstamp_compile,
ACTION_NAMES.assemble,
ACTION_NAMES.preprocess_assemble,
ACTION_NAMES.cpp_module_codegen,
ACTION_NAMES.lto_backend,
]

all_link_actions = [
ACTION_NAMES.cpp_link_executable,
ACTION_NAMES.cpp_link_dynamic_library,
ACTION_NAMES.cpp_link_nodeps_dynamic_library,
]

action_configs = []

opt_feature = feature(name = "opt")

dbg_feature = feature(name = "dbg")

sysroot_feature = feature(
name = "sysroot",
enabled = True, # 修改:禁用 sysroot,除非你真的需要
flag_sets = [
flag_set(
actions = [
ACTION_NAMES.preprocess_assemble,
ACTION_NAMES.linkstamp_compile,
ACTION_NAMES.c_compile,
ACTION_NAMES.cpp_compile,
ACTION_NAMES.cpp_header_parsing,
ACTION_NAMES.cpp_module_compile,
ACTION_NAMES.cpp_module_codegen,
ACTION_NAMES.lto_backend,
ACTION_NAMES.clif_match,
ACTION_NAMES.cpp_link_executable,
ACTION_NAMES.cpp_link_dynamic_library,
ACTION_NAMES.cpp_link_nodeps_dynamic_library,
],
flag_groups = [
flag_group(
flags = ["--sysroot=%{sysroot}"],
expand_if_available = "sysroot",
),
],
),
],
)

unfiltered_compile_flags_feature = feature(
name = "unfiltered_compile_flags",
enabled = True,
flag_sets = [
flag_set(
actions = [
ACTION_NAMES.assemble,
ACTION_NAMES.preprocess_assemble,
ACTION_NAMES.linkstamp_compile,
ACTION_NAMES.c_compile,
ACTION_NAMES.cpp_compile,
ACTION_NAMES.cpp_header_parsing,
ACTION_NAMES.cpp_module_compile,
ACTION_NAMES.cpp_module_codegen,
ACTION_NAMES.lto_backend,
ACTION_NAMES.clif_match,
],
flag_groups = [
flag_group(
flags = [
"-Wno-builtin-macro-redefined",
"-D__DATE__=\"redacted\"",
"-D__TIMESTAMP__=\"redacted\"",
"-D__TIME__=\"redacted\"",
"-no-canonical-prefixes",
"-fno-canonical-system-headers",
],
),
],
),
],
)

default_compile_flags_feature = feature(
name = "default_compile_flags",
enabled = True,
flag_sets = [
flag_set(
actions = [
ACTION_NAMES.assemble,
ACTION_NAMES.preprocess_assemble,
ACTION_NAMES.linkstamp_compile,
ACTION_NAMES.c_compile,
ACTION_NAMES.cpp_compile,
ACTION_NAMES.cpp_header_parsing,
ACTION_NAMES.cpp_module_compile,
ACTION_NAMES.cpp_module_codegen,
ACTION_NAMES.lto_backend,
ACTION_NAMES.clif_match,
],
flag_groups = [
flag_group(
flags = [
"-fstack-protector", # 修改:更强的栈保护
],
),
],
),
flag_set(
actions = [
ACTION_NAMES.assemble,
ACTION_NAMES.preprocess_assemble,
ACTION_NAMES.linkstamp_compile,
ACTION_NAMES.c_compile,
ACTION_NAMES.cpp_compile,
ACTION_NAMES.cpp_header_parsing,
ACTION_NAMES.cpp_module_compile,
ACTION_NAMES.cpp_module_codegen,
ACTION_NAMES.lto_backend,
ACTION_NAMES.clif_match,
],
flag_groups = [flag_group(flags = ["-g"])],
with_features = [with_feature_set(features = ["dbg"])],
),
flag_set(
actions = [
ACTION_NAMES.assemble,
ACTION_NAMES.preprocess_assemble,
ACTION_NAMES.linkstamp_compile,
ACTION_NAMES.c_compile,
ACTION_NAMES.cpp_compile,
ACTION_NAMES.cpp_header_parsing,
ACTION_NAMES.cpp_module_compile,
ACTION_NAMES.cpp_module_codegen,
ACTION_NAMES.lto_backend,
ACTION_NAMES.clif_match,
],
flag_groups = [
flag_group(
flags = [
"-g0",
"-O2", # 修改:默认优化级别
"-DNDEBUG",
"-ffunction-sections",
"-fdata-sections",
],
),
],
with_features = [with_feature_set(features = ["opt"])],
),
flag_set(
actions = [
ACTION_NAMES.linkstamp_compile,
ACTION_NAMES.cpp_compile,
ACTION_NAMES.cpp_header_parsing,
ACTION_NAMES.cpp_module_compile,
ACTION_NAMES.cpp_module_codegen,
ACTION_NAMES.lto_backend,
ACTION_NAMES.clif_match,
],
flag_groups = [
flag_group(
flags = [
"-isystem",
"/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/lib/gcc/aarch64-linux-gnu/7.5.0/include",
"-isystem",
"/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/lib/gcc/aarch64-linux-gnu/7.5.0/include-fixed",
"-isystem",
"/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/aarch64-linux-gnu/include/c++/7.5.0",
"-isystem",
"/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/aarch64-linux-gnu/libc/usr/include/",
],
),
],
),
],
)

default_link_flags_feature = feature(
name = "default_link_flags",
enabled = True,
flag_sets = [
flag_set(
actions = all_link_actions,
flag_groups = [
flag_group(
flags = [
"-lstdc++",
"-Wl,-z,relro,-z,now",
"-no-canonical-prefixes",
"-pass-exit-codes",
"-Wl,--build-id=md5",
"-Wl,--hash-style=gnu",
],
),
],
),
flag_set(
actions = all_link_actions,
flag_groups = [flag_group(flags = ["-Wl,--gc-sections"])],
with_features = [with_feature_set(features = ["opt"])],
),
],
)

supports_dynamic_linker_feature = feature(name = "supports_dynamic_linker", enabled = True)

supports_pic_feature = feature(name = "supports_pic", enabled = True)

user_compile_flags_feature = feature(
name = "user_compile_flags",
enabled = True,
flag_sets = [
flag_set(
actions = [
ACTION_NAMES.assemble,
ACTION_NAMES.preprocess_assemble,
ACTION_NAMES.linkstamp_compile,
ACTION_NAMES.c_compile,
ACTION_NAMES.cpp_compile,
ACTION_NAMES.cpp_header_parsing,
ACTION_NAMES.cpp_module_compile,
ACTION_NAMES.cpp_module_codegen,
ACTION_NAMES.lto_backend,
ACTION_NAMES.clif_match,
],
flag_groups = [
flag_group(
flags = ["%{user_compile_flags}"],
iterate_over = "user_compile_flags",
expand_if_available = "user_compile_flags",
),
],
),
],
)

features = [
default_compile_flags_feature,
default_link_flags_feature,
supports_dynamic_linker_feature,
supports_pic_feature,
opt_feature,
dbg_feature,
user_compile_flags_feature,
sysroot_feature,
unfiltered_compile_flags_feature,
]

cxx_builtin_include_directories = [
"/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/lib/gcc/aarch64-linux-gnu/7.5.0/include",
"/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/lib/gcc/aarch64-linux-gnu/7.5.0/include-fixed",
"/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/aarch64-linux-gnu/include/c++/7.5.0", # 修改:C++ 头文件
"/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/aarch64-linux-gnu/libc/usr/include/", # 修改:架构特定头文件
]

artifact_name_patterns = []

make_variables = []

tool_paths = [
tool_path(
name = "ar",
path = "/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-ar", # 修改:归档工具
),
tool_path(name = "compat-ld", path = "/bin/false"),
tool_path(
name = "cpp",
path = "/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-cpp", # 修改:C 预处理器
),
tool_path(
name = "dwp",
path = "/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-dwp",
),
tool_path(
name = "gcc",
path = "/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-gcc", # 修改:C 编译器
),
tool_path(
name = "gcov",
path = "/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-gcov",
),
tool_path(
name = "ld",
path = "/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-ld", # 修改:链接器
),
tool_path(
name = "nm",
path = "/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-nm", # 修改:符号表工具
),
tool_path(
name = "objcopy",
path = "/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-objcopy", # 修改:对象复制工具
),
tool_path(
name = "objdump",
path = "/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-objdump", # 修改:对象转储工具
),
tool_path(
name = "strip",
path = "/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-strip", # 修改:剥离工具
),
]


out = ctx.actions.declare_file(ctx.label.name)
ctx.actions.write(out, "Fake executable")
return [
cc_common.create_cc_toolchain_config_info(
ctx = ctx,
features = features,
action_configs = action_configs,
artifact_name_patterns = artifact_name_patterns,
cxx_builtin_include_directories = cxx_builtin_include_directories,
toolchain_identifier = toolchain_identifier,
host_system_name = host_system_name,
target_system_name = target_system_name,
target_cpu = target_cpu,
target_libc = target_libc,
compiler = compiler,
abi_version = abi_version,
abi_libc_version = abi_libc_version,
tool_paths = tool_paths,
make_variables = make_variables,
builtin_sysroot = builtin_sysroot,
cc_target_os = cc_target_os
),
DefaultInfo(
executable = out,
),
]
cc_toolchain_config = rule(
implementation = _impl,
attrs = {
"cpu": attr.string(mandatory=True, values=["aarch64"]), # 修改:只允许 aarch64
},
provides = [CcToolchainConfigInfo],
executable = True,
)

编辑tensorflow-2.15.0/arm_compiler.BUILD(可选?)

将aarch64-none-linux-gnu换成aarch64-linux-gnu

检查工具链

bazel query //rk3308_toolchain:toolchain

编辑tensorflow-2.15.0/.bazelrc

build:rk3308 --crosstool_top=//rk3308_toolchain:toolchain
build:rk3308 --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:rk3308 --cpu=aarch64

运行

bazel build --config=rk3308 -c opt //tensorflow/lite:libtensorflowlite.so

报错

ERROR: /media/jw/PSSD/tensorflow-2.15.0/tensorflow/lite/kernels/internal/BUILD:448:11: Compiling tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc failed: (Exit 1): aarch64-linux-gnu-gcc failed: error executing command (from target //tensorflow/lite/kernels/internal:optimized_4bit) /home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-gcc -fstack-protector -g0 -O2 -DNDEBUG -ffunction-sections -fdata-sections -isystem ... (remaining 59 arguments skipped)
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc: In function 'void tflite::optimized_4bit::NeonAssignBiasAndComputeOffsets(const int32_t*, const float*, float*, const float*, float*, int, int)':
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:122:42: error: 'vld1q_f32_x4' was not declared in this scope
float32x4x4_t v0_to_v3_f32x4x4 = vld1q_f32_x4(filter_scales_ptr);
^~~~~~~~~~~~
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:122:42: note: suggested alternative: 'vld1q_f32'
float32x4x4_t v0_to_v3_f32x4x4 = vld1q_f32_x4(filter_scales_ptr);
^~~~~~~~~~~~
vld1q_f32
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:134:9: error: 'vst1q_f32_x4' was not declared in this scope
vst1q_f32_x4(output_ptr, v5_to_v8_f32x4x4);
^~~~~~~~~~~~
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:134:9: note: suggested alternative: 'vst1q_f32'
vst1q_f32_x4(output_ptr, v5_to_v8_f32x4x4);
^~~~~~~~~~~~
vst1q_f32
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:138:42: error: 'vld1q_f32_x2' was not declared in this scope
float32x4x2_t v0_to_v1_f32x4x2 = vld1q_f32_x2(filter_scales_ptr);
^~~~~~~~~~~~
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:138:42: note: suggested alternative: 'vld1q_f32'
float32x4x2_t v0_to_v1_f32x4x2 = vld1q_f32_x2(filter_scales_ptr);
^~~~~~~~~~~~
vld1q_f32
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:146:9: error: 'vst1q_f32_x2' was not declared in this scope
vst1q_f32_x2(output_ptr, v5_to_v6_f32x4x2);
^~~~~~~~~~~~
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:146:9: note: suggested alternative: 'vst1q_f32'
vst1q_f32_x2(output_ptr, v5_to_v6_f32x4x2);
^~~~~~~~~~~~
vst1q_f32
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:172:40: error: 'vld1q_f32_x4' was not declared in this scope
float32x4x4_t v0_to_v3_f32x4x4 = vld1q_f32_x4(filter_scales_ptr);
^~~~~~~~~~~~
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:172:40: note: suggested alternative: 'vld1q_f32'
float32x4x4_t v0_to_v3_f32x4x4 = vld1q_f32_x4(filter_scales_ptr);
^~~~~~~~~~~~
vld1q_f32
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:179:7: error: 'vst1q_f32_x4' was not declared in this scope
vst1q_f32_x4(output_ptr, v13_to_v16_f32x4x4);
^~~~~~~~~~~~
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:179:7: note: suggested alternative: 'vst1q_f32'
vst1q_f32_x4(output_ptr, v13_to_v16_f32x4x4);
^~~~~~~~~~~~
vst1q_f32
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:183:40: error: 'vld1q_f32_x2' was not declared in this scope
float32x4x2_t v0_to_v1_f32x4x2 = vld1q_f32_x2(filter_scales_ptr);
^~~~~~~~~~~~
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:183:40: note: suggested alternative: 'vld1q_f32'
float32x4x2_t v0_to_v1_f32x4x2 = vld1q_f32_x2(filter_scales_ptr);
^~~~~~~~~~~~
vld1q_f32
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:188:7: error: 'vst1q_f32_x2' was not declared in this scope
vst1q_f32_x2(output_ptr, v11_to_v12_f32x4x2);
^~~~~~~~~~~~
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:188:7: note: suggested alternative: 'vst1q_f32'
vst1q_f32_x2(output_ptr, v11_to_v12_f32x4x2);
^~~~~~~~~~~~
vst1q_f32
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc: In function 'void tflite::optimized_4bit::NeonBatchQuantizeFloats4Bit(const float*, int, int, int8_t*, float*, int, int, int32_t*)':
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:382:44: error: 'vld1q_f32_x4' was not declared in this scope
const float32x4x4_t v1_f32x4x4 = vld1q_f32_x4(x1);
^~~~~~~~~~~~
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:382:44: note: suggested alternative: 'vld1q_f32'
const float32x4x4_t v1_f32x4x4 = vld1q_f32_x4(x1);
^~~~~~~~~~~~
vld1q_f32
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:404:44: error: 'vld1q_f32_x2' was not declared in this scope
const float32x4x2_t v1_f32x4x2 = vld1q_f32_x2(x1);
^~~~~~~~~~~~
tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.cc:404:44: note: suggested alternative: 'vld1q_f32'
const float32x4x2_t v1_f32x4x2 = vld1q_f32_x2(x1);
^~~~~~~~~~~~
vld1q_f32
Target //tensorflow/lite:libtensorflowlite.so failed to build
Use --verbose_failures to see the command lines of failed build steps.

bazel clean清空缓存后报错

ERROR: /home/jw/.cache/bazel/_bazel_jw/e63bf23c7ad9a33bf748ef6d0ba1dfc8/external/XNNPACK/BUILD.bazel:1420:19: Compiling src/amalgam/gen/neoni8mm.c failed: (Exit 1): aarch64-linux-gnu-gcc failed: error executing command (from target @XNNPACK//:neoni8mm_prod_microkernels) /home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-gcc -fstack-protector -g0 -O2 -DNDEBUG -ffunction-sections -fdata-sections -MD -MF ... (remaining 43 arguments skipped)
cc1: error: invalid feature modifier in '-march=armv8.2-a+i8mm'
Target //tensorflow/lite:libtensorflowlite.so failed to build

通过cmake方式安装

官网地址: https://www.tensorflow.org/lite/guide/build_cmake_arm?hl=zh-cn

1.本地编译

cmake ../tensorflow_src/tensorflow/lite

实际测试,将所有a文件拷到离线设备上也无法使用

2.交叉编译

下载工具链

wget https://releases.linaro.org/components/toolchain/binaries/latest-7/aarch64-linux-gnu/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu.tar.xz
tar xvJf gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu.tar.xz
mv gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu $DIR/
export PATH="$PATH:$DIR/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin"

配置

ARMCC_PREFIX=${HOME}/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-
ARMCC_FLAGS="-funsafe-math-optimizations"
cmake -DCMAKE_C_COMPILER=${ARMCC_PREFIX}gcc \
-DCMAKE_CXX_COMPILER=${ARMCC_PREFIX}g++ \
-DCMAKE_C_FLAGS="${ARMCC_FLAGS}" \
-DCMAKE_CXX_FLAGS="${ARMCC_FLAGS}" \
-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \
-DCMAKE_SYSTEM_NAME=Linux \
-DCMAKE_SYSTEM_PROCESSOR=aarch64 \
../tensorflow/lite/

make

make -j8

或者

cmake --build . -j

注意: 此方式复制.a文件无法运行

cmake方式直接集成到项目

在Cmakelists.txt中添加代码

set(TENSORFLOW_SOURCE_DIR "" CACHE PATH
"Directory that contains the TensorFlow project"
)
if(NOT TENSORFLOW_SOURCE_DIR)
get_filename_component(TENSORFLOW_SOURCE_DIR
"${CMAKE_CURRENT_LIST_DIR}/../../../../"
ABSOLUTE
)
endif()

add_subdirectory(
"${TENSORFLOW_SOURCE_DIR}/tensorflow/lite"
"${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite"
EXCLUDE_FROM_ALL
)

set(CMAKE_CXX_STANDARD 11)

设置 TENSORFLOW_SOURCE_DIR 为自己的tensorflow路径
然后直接编译

mkdir build 
cd build
cmake ..
make -j8

交叉编译

export cross_compile_toolchain=/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu

mkdir -p build_arm && cd build_arm

cmake .. \
-DCMAKE_SYSTEM_NAME=Linux \
-DCMAKE_SYSTEM_VERSION=1 \
-DCMAKE_SYSTEM_PROCESSOR=aarch64 \
-DCMAKE_C_COMPILER=$cross_compile_toolchain/bin/aarch64-linux-gnu-gcc \
-DCMAKE_CXX_COMPILER=$cross_compile_toolchain/bin/aarch64-linux-gnu-g++ \
-DCMAKE_BUILD_TYPE=release

make -j4

测试发现如果程序有静态库,会自动生成静态库,将静态库保存到lib文件夹内

find ./build -name "*.so*" -exec cp {} /path/to/save/directory/ \;

cmake 方式报错解决

错误内容: cc1: error: invalid feature modifier in ‘-march=armv8.2-a+i8mm+fp16’
或者cc1: error: invalid feature modifier in ‘-march=armv8.2-a+bf16’
由于目标板子不支持i8mm,fp16,bf16,所以编译时去掉选项

ARMCC_PREFIX=${HOME}/toolchains/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu/bin/aarch64-linux-gnu-
ARMCC_FLAGS="-funsafe-math-optimizations"
cmake -DCMAKE_C_COMPILER=${ARMCC_PREFIX}gcc \
-DCMAKE_CXX_COMPILER=${ARMCC_PREFIX}g++ \
-DCMAKE_C_FLAGS="${ARMCC_FLAGS}" \
-DCMAKE_CXX_FLAGS="${ARMCC_FLAGS}" \
-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \
-DCMAKE_SYSTEM_NAME=Linux \
-DCMAKE_SYSTEM_PROCESSOR=aarch64 \
-DXNNPACK_ENABLE_ARM_BF16=OFF \
-DXNNPACK_ENABLE_ARM_I8MM=OFF \
../tensorflow/lite

测试发现tensorflow2.16.1编译时无法去掉,将版本改为2.15.0,成功编译,也可以运行2.16.1转换的模型
此方式编译的libtensorflow-lite.a导入到项目编译报错

oDL_c.so: undefined reference to `flatbuffers::ClassicLocale::instance_'
/home/jw/code/AudioDL/runtime/libaudioDL_demo/lib/aarch64/libaudioDL_c.so: undefined reference to `util::Fingerprint64(char const*, unsigned long)'
/home/jw/code/AudioDL/runtime/libaudioDL_demo/lib/aarch64/libaudioDL_c.so: undefined reference to `ruy::KernelFloatNeon(ruy::KernelParamsFloat<8, 8> const&)'
/home/jw/code/AudioDL/runtime/libaudioDL_demo/lib/aarch64/libaudioDL_c.so: undefined reference to `ruy::Context::set_max_num_threads(int)'

尝试minimal项目交叉编译gcc7.5.0版本

cd tensorflow-2.15.0/tensorflow/lite/examples/minimal
mkdir build_aarch64 && cd build_aarch64

ARMCC_PREFIX=/home/jw/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-
ARMCC_FLAGS="-funsafe-math-optimizations"
proxychains4 cmake -DCMAKE_C_COMPILER=${ARMCC_PREFIX}gcc \
-DCMAKE_CXX_COMPILER=${ARMCC_PREFIX}g++ \
-DCMAKE_C_FLAGS="${ARMCC_FLAGS}" \
-DCMAKE_CXX_FLAGS="${ARMCC_FLAGS}" \
-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \
-DCMAKE_SYSTEM_NAME=Linux \
-DCMAKE_SYSTEM_PROCESSOR=aarch64 \
-DXNNPACK_ENABLE_ARM_BF16=OFF \
-DXNNPACK_ENABLE_ARM_I8MM=OFF \
..
make -j8

发现一样报错

oDL_c.so: undefined reference to `flatbuffers::ClassicLocale::instance_'
/home/jw/code/AudioDL/runtime/libaudioDL_demo/lib/aarch64/libaudioDL_c.so: undefined reference to `util::Fingerprint64(char const*, unsigned long)'
/home/jw/code/AudioDL/runtime/libaudioDL_demo/lib/aarch64/libaudioDL_c.so: undefined reference to `ruy::KernelFloatNeon(ruy::KernelParamsFloat<8, 8> const&)'
/home/jw/code/AudioDL/runtime/libaudioDL_demo/lib/aarch64/libaudioDL_c.so: undefined reference to `ruy::Context::set_max_num_threads(int)'

------ 本文结束 🎉🎉 谢谢观看 ------
0%