From 2afc1e9f094c1a50337809810b2c5ad5400ac46e Mon Sep 17 00:00:00 2001 From: "@mr-zql" Date: Wed, 12 Nov 2025 11:54:05 +0800 Subject: [PATCH 1/3] add Scatter operator sample --- .../AclNNInvocation/README.md | 55 +++ .../AclNNInvocation/inc/common.h | 45 ++ .../AclNNInvocation/inc/op_runner.h | 181 +++++++ .../AclNNInvocation/inc/operator_desc.h | 57 +++ .../AclNNInvocation/input/.keep | 0 .../AclNNInvocation/run.sh | 75 +++ .../AclNNInvocation/scripts/acl.json | 1 + .../AclNNInvocation/scripts/gen_data.py | 24 + .../AclNNInvocation/scripts/verify_result.py | 53 ++ .../AclNNInvocation/src/CMakeLists.txt | 65 +++ .../AclNNInvocation/src/common.cpp | 80 +++ .../AclNNInvocation/src/main.cpp | 163 +++++++ .../AclNNInvocation/src/op_runner.cpp | 457 ++++++++++++++++++ .../AclNNInvocation/src/operator_desc.cpp | 51 ++ .../28_scatter_frameworklaunch/README.md | 109 +++++ .../ScatterCustom.json | 39 ++ .../ScatterCustom/op_host/scatter_custom.cpp | 69 +++ .../op_host/scatter_custom_tiling.h | 22 + .../op_kernel/scatter_custom.cpp | 92 ++++ .../28_scatter_frameworklaunch/install.sh | 54 +++ operator/ascendc/0_introduction/README.md | 1 + 21 files changed, 1693 insertions(+) create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/README.md create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/common.h create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/op_runner.h create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/operator_desc.h create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/input/.keep create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/run.sh create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/acl.json create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/gen_data.py create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/verify_result.py create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/CMakeLists.txt create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/common.cpp create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/main.cpp create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/op_runner.cpp create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/operator_desc.cpp create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/README.md create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom.json create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom.cpp create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom_tiling.h create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_kernel/scatter_custom.cpp create mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/install.sh diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/README.md b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/README.md new file mode 100644 index 000000000..7ec773047 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/README.md @@ -0,0 +1,55 @@ +## 目录结构介绍 +``` +├── AclNNInvocation //通过aclnn调用的方式调用ScatterCustom算子 +│ ├── inc // 头文件目录 +│ │ ├── common.h // 声明公共方法类,用于读取二进制文件 +│ │ ├── op_runner.h // 算子描述声明文件,包含算子输入/输出,算子类型以及输入描述与输出描述 +│ │ └── operator_desc.h // 算子运行相关信息声明文件,包含算子输入/输出个数,输入/输出大小等 +│ ├── input // 存放脚本生成的输入数据目录 +│ ├── output // 存放算子运行输出数据和真值数据的目录 +│ ├── scripts +│ │ ├── acl.json // acl配置文件 +│ │ ├── gen_data.py // 输入数据和真值数据生成脚本 +│ │ └── verify_result.py // 真值对比文件 +│ ├── src +│ │ ├── CMakeLists.txt // 编译规则文件 +│ │ ├── common.cpp // 公共函数,读取二进制文件函数的实现文件 +│ │ ├── main.cpp // 单算子调用应用的入口 +│ │ ├── op_runner.cpp // 单算子调用主体流程实现文件 +│ │ └── operator_desc.cpp // 构造算子的输入与输出描述 +│ └── run.sh // 执行命令脚本 +``` +## 代码实现介绍 +完成自定义算子的开发部署后,可以通过单算子调用的方式来验证单算子的功能。src/main.cpp代码为单算子API执行方式。单算子API执行是基于C语言的API执行算子,无需提供单算子描述文件进行离线模型的转换,直接调用单算子API接口。 + +自定义算子编译部署后,会自动生成单算子API,可以直接在应用程序中调用。算子API的形式一般定义为“两段式接口”,形如: + ```cpp + // 获取算子使用的workspace空间大小 + aclnnStatus aclnnScatterCustomGetWorkspaceSize(const aclTensor *x, const aclTensor *y, const aclTensor *out, uint64_t *workspaceSize, aclOpExecutor **executor); + // 执行算子 + aclnnStatus aclnnScatterCustom(void *workspace, int64_t workspaceSize, aclOpExecutor *executor, aclrtStream stream); + ``` +其中aclnnScatterCustomGetWorkspaceSize为第一段接口,主要用于计算本次API调用计算过程中需要多少的workspace内存。获取到本次API计算需要的workspace大小之后,开发者可以按照workspaceSize大小申请Device侧内存,然后调用第二段接口aclnnScatterCustom执行计算。具体参考[单算子API调用](https://hiascend.com/document/redirect/CannCommunityAscendCInVorkSingleOp)章节。 + +## 运行样例算子 +### 1. 编译算子工程 +运行此样例前,请参考[编译算子工程](../README.md#operatorcompile)完成前期准备。 +### 2. aclnn调用样例运行 + + - 进入到样例目录 + 以命令行方式下载样例代码,master分支为例。 + ```bash + cd ${git_clone_path}/samples/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation + ``` + - 样例执行 + + 样例执行过程中会自动生成测试数据,然后编译与运行aclnn样例,最后检验运行结果。具体过程可参见run.sh脚本。 + + ```bash + bash run.sh + ``` +## 更新说明 +| 时间 | 更新事项 | +| ---------- | ------------ | +| 2025/11/12 | 新增本readme | +| 2025/11/12 | 样例目录调整 | \ No newline at end of file diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/common.h b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/common.h new file mode 100644 index 000000000..11bb4aeca --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/common.h @@ -0,0 +1,45 @@ +/** + * @file common.h + * + * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#ifndef COMMON_H +#define COMMON_H + +#include +#include +#include +#include +#include + +#include "acl/acl.h" + +#define SUCCESS 0 +#define FAILED 1 + +#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args) +#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN] " fmt "\n", ##args) +#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR] " fmt "\n", ##args) + +/** + * @brief Read data from file + * @param [in] filePath: file path + * @param [out] fileSize: file size + * @return read result + */ +bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize); + +/** + * @brief Write data to file + * @param [in] filePath: file path + * @param [in] buffer: data to write to file + * @param [in] size: size to write + * @return write result + */ +bool WriteFile(const std::string &filePath, const void *buffer, size_t size); + +#endif // COMMON_H diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/op_runner.h b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/op_runner.h new file mode 100644 index 000000000..f1b3a6706 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/op_runner.h @@ -0,0 +1,181 @@ +/** + * @file op_runner.h + * + * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#ifndef OP_RUNNER_H +#define OP_RUNNER_H + +#include "acl/acl.h" +#include "aclnn/acl_meta.h" +#include "common.h" +#include "operator_desc.h" + +/** + * Op Runner + */ +class OpRunner { +public: + /** + * @brief Constructor + * @param [in] opDesc: op description + */ + explicit OpRunner(OperatorDesc *opDesc); + + /** + * @brief Destructor + */ + virtual ~OpRunner(); + + /** + * @brief Init op runner + */ + bool Init(); + + /** + * @brief Get number of inputs + * @return number of inputs + */ + const size_t NumInputs(); + + /** + * @brief Get number of outputs + * @return number of outputs + */ + const size_t NumOutputs(); + + /** + * @brief Get input size by index + * @param [in] index: input index + * @return size of the input + */ + const size_t GetInputSize(size_t index) const; + const size_t GetInputNumDims(size_t index) const; + aclDataType GetInputDataType(size_t index) const; + aclFormat GetInputFormat(size_t index) const; + + /** + * @brief Get output size by index + * @param [in] index: output index + * @return size of the output + */ + size_t GetOutputSize(size_t index) const; + const size_t GetOutputNumDims(size_t index) const; + aclDataType GetOutputDataType(size_t index) const; + aclFormat GetOutputFormat(size_t index) const; + + /** + * @brief Get input element count by index + * @param i[in] ndex: input index + * @return element count of the input + */ + size_t GetInputElementCount(size_t index) const; + + /** + * @brief Get output element count by index + * @param [in] index: output index + * @return element count of the output + */ + size_t GetOutputElementCount(size_t index) const; + + /** + * @brief Get input shape by index + * @param [in] index: input index + * @return shape of the output + */ + std::vector GetInputShape(size_t index) const; + + /** + * @brief Get output shape by index + * @param [in] index: output index + * @return shape of the output + */ + std::vector GetOutputShape(size_t index) const; + + /** + * @brief Get input buffer(host memory) by index + * @tparam T: data type + * @param [in] index: input index + * @return host address of the input + */ + template T *GetInputBuffer(size_t index) + { + if (index >= numInputs_) { + ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); + return nullptr; + } + return reinterpret_cast(hostInputs_[index]); + } + + /** + * @brief Get output buffer(host memory) by index + * @tparam T: data type + * @param [in] index: output index + * @return host address of the output + */ + template const T *GetOutputBuffer(size_t index) + { + if (index >= numOutputs_) { + ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); + return nullptr; + } + + return reinterpret_cast(hostOutputs_[index]); + } + + /** + * @brief Print readable input by index + * @param [in] index: input index + * @param [in] elementsPerRow: number of elements per row + */ + void PrintInput(size_t index, size_t elementsPerRow = 16); + + /** + * @brief Print readable output by index + * @param [in] index: output index + * @param [in] elementsPerRow: number of elements per row + */ + void PrintOutput(size_t index, size_t elementsPerRow = 16); + + /** + * @brief Compile static op + * @return compile result + */ + bool CompileStaticOp(); + + /** + * @brief Compile dynamic op + * @return compile result + */ + bool CompileDynamicOp(); + + /** + * @brief Run op + * @return run result + */ + bool RunOp(); + +private: + size_t numInputs_; + size_t numOutputs_; + void *workspace_; + + std::vector inputBuffers_; + std::vector outputBuffers_; + + std::vector devInputs_; + std::vector devOutputs_; + + std::vector hostInputs_; + std::vector hostOutputs_; + + std::vector inputTensor_; + std::vector outputTensor_; + OperatorDesc *opDesc_; +}; + +#endif // OP_RUNNER_H diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/operator_desc.h b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/operator_desc.h new file mode 100644 index 000000000..6d8ee0905 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/operator_desc.h @@ -0,0 +1,57 @@ +/** + * @file operator_desc.h + * + * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#ifndef OPERATOR_DESC_H +#define OPERATOR_DESC_H + +#include +#include + +#include "acl/acl.h" + +/** + * Op description + */ +struct OperatorDesc { + /** + * Constructor + */ + explicit OperatorDesc(); + + /** + * Destructor + */ + virtual ~OperatorDesc(); + + /** + * Add an input tensor description + * @param [in] dataType: data type + * @param [in] numDims: number of dims + * @param [in] dims: dims + * @param [in] format: format + * @return OperatorDesc + */ + OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format); + + /** + * Add an output tensor description + * @param [in] dataType: data type + * @param [in] numDims: number of dims + * @param [in] dims: dims + * @param [in] format: format + * @return OperatorDesc + */ + OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format); + + std::string opType; + std::vector inputDesc; + std::vector outputDesc; +}; + +#endif // OPERATOR_DESC_H diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/input/.keep b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/input/.keep new file mode 100644 index 000000000..e69de29bb diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/run.sh b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/run.sh new file mode 100644 index 000000000..215f48cee --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/run.sh @@ -0,0 +1,75 @@ +#!/bin/bash +CURRENT_DIR=$( + cd $(dirname ${BASH_SOURCE:-$0}) + pwd +) + +if [ -n "$ASCEND_INSTALL_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH +elif [ -n "$ASCEND_HOME_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH +else + if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then + _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest + else + _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest + fi +fi +source $_ASCEND_INSTALL_PATH/bin/setenv.bash +export DDK_PATH=$_ASCEND_INSTALL_PATH +export NPU_HOST_LIB=$_ASCEND_INSTALL_PATH/$(arch)-$(uname -s | tr '[:upper:]' '[:lower:]')/devlib + +function main { + # 1. 清除遗留生成文件和日志文件 + rm -rf $HOME/ascend/log/* + rm ./input/*.bin + rm ./output/*.bin + + # 2. 生成输入数据和真值数据 + cd $CURRENT_DIR + python3 scripts/gen_data.py + if [ $? -ne 0 ]; then + echo "[ERROR]: Generate input data failed!" + return 1 + fi + echo "[INFO]: Generate input data success!" + + # 3. 编译可执行文件 + cd $CURRENT_DIR + rm -rf build + mkdir -p build + cd build + cmake ../src -DCMAKE_SKIP_RPATH=TRUE + if [ $? -ne 0 ]; then + echo "[ERROR]: Cmake failed!" + return 1 + fi + echo "[INFO]: Cmake success!" + make + if [ $? -ne 0 ]; then + echo "[ERROR]: Make failed!" + return 1 + fi + echo "[INFO]: Make success!" + + # 4. 运行可执行文件 + export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH + cd $CURRENT_DIR/output + echo "[INFO]: Execute op!" + ./execute_add_op + if [ $? -ne 0 ]; then + echo "[ERROR]: Acl executable run failed! please check your project!" + return 1 + fi + echo "[INFO]: Acl executable run success!" + + # 5. 精度比对 + cd $CURRENT_DIR + python3 scripts/verify_result.py output/output_dst.bin output/dst.bin + if [ $? -ne 0 ]; then + echo "[ERROR]: Verify result failed!" + return 1 + fi +} + +main diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/acl.json b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/acl.json new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/acl.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/gen_data.py b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/gen_data.py new file mode 100644 index 000000000..6483833c3 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/gen_data.py @@ -0,0 +1,24 @@ +#!/usr/bin/python3 +# coding=utf-8 +# +# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# =============================================================================== + +import numpy as np + + +def gen_golden_data_simple(): + src = np.arange(128, dtype = np.float16) + dst_Offset = np.arange(254, -1, -2, dtype = np.uint32) + dst = np.arange(127, -1, -1, dtype = np.float16) + src.tofile("./input/src.bin") + dst_Offset.tofile("./input/dst_Offset.bin") + dst.tofile("./output/dst.bin") + + +if __name__ == "__main__": + gen_golden_data_simple() diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/verify_result.py new file mode 100644 index 000000000..2dd46f803 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/verify_result.py @@ -0,0 +1,53 @@ +#!/usr/bin/python3 +# coding=utf-8 +# +# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# =============================================================================== + +import sys +import numpy as np + +# for float16 +relative_tol = 1e-3 +absolute_tol = 1e-5 +error_tol = 1e-3 + + +def verify_result(output, golden): + output = np.fromfile(output, dtype=np.float16).reshape(-1) + golden = np.fromfile(golden, dtype=np.float16).reshape(-1) + different_element_results = np.isclose(output, + golden, + rtol=relative_tol, + atol=absolute_tol, + equal_nan=True) + different_element_indexes = np.where(different_element_results == False)[0] + for index in range(len(different_element_indexes)): + real_index = different_element_indexes[index] + golden_data = golden[real_index] + output_data = output[real_index] + print( + "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" % + (real_index, golden_data, output_data, + abs(output_data - golden_data) / golden_data)) + if index == 100: + break + error_ratio = float(different_element_indexes.size) / golden.size + print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol)) + return error_ratio <= error_tol + + +if __name__ == '__main__': + try: + res = verify_result(sys.argv[1], sys.argv[2]) + if not res: + raise ValueError("[ERROR] result error") + else: + print("test pass") + except Exception as e: + print(e) + sys.exit(1) diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/CMakeLists.txt b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/CMakeLists.txt new file mode 100644 index 000000000..8e9e45375 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/CMakeLists.txt @@ -0,0 +1,65 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(acl_execute_add) + +# Compile options +add_compile_options(-std=c++11) + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../output") +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../output") + +set(INC_PATH $ENV{DDK_PATH}) + +if (NOT DEFINED ENV{DDK_PATH}) + set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest") + message(STATUS "set default INC_PATH: ${INC_PATH}") +else () + message(STATUS "env INC_PATH: ${INC_PATH}") +endif() + +set(CUST_PKG_PATH "${INC_PATH}/opp/vendors/customize/op_api") + +set(LIB_PATH $ENV{NPU_HOST_LIB}) + +# Dynamic libraries in the stub directory can only be used for compilation +if (NOT DEFINED ENV{NPU_HOST_LIB}) + string(TOLOWER "${CMAKE_SYSTEM_NAME}" SYSTEM_NAME_LOWER) + set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/${CMAKE_SYSTEM_PROCESSOR}-${SYSTEM_NAME_LOWER}/devlib") + message(STATUS "set default LIB_PATH: ${LIB_PATH}") +else () + message(STATUS "env LIB_PATH: ${LIB_PATH}") +endif() + +# Header path +include_directories( + ../inc + ${INC_PATH}/include + ${CUST_PKG_PATH}/include +) + +# add host lib path +link_directories( + ${LIB_PATH} + ${CUST_PKG_PATH}/lib +) + +add_executable(execute_add_op + operator_desc.cpp + op_runner.cpp + main.cpp + common.cpp +) + +target_link_libraries(execute_add_op + ascendcl + cust_opapi + acl_op_compiler + nnopbase + stdc++ +) + +install(TARGETS execute_add_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/common.cpp b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/common.cpp new file mode 100644 index 000000000..992759c95 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/common.cpp @@ -0,0 +1,80 @@ +/** + * @file common.cpp + * + * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#include "common.h" + +#include +#include +#include + +#include + +extern bool g_isDevice; + +bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize) +{ + struct stat sBuf; + int fileStatus = stat(filePath.data(), &sBuf); + if (fileStatus == -1) { + ERROR_LOG("failed to get file %s", filePath.c_str()); + return false; + } + if (S_ISREG(sBuf.st_mode) == 0) { + ERROR_LOG("%s is not a file, please enter a file", filePath.c_str()); + return false; + } + + std::ifstream file; + file.open(filePath, std::ios::binary); + if (!file.is_open()) { + ERROR_LOG("Open file failed. path = %s", filePath.c_str()); + return false; + } + + std::filebuf *buf = file.rdbuf(); + size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in); + if (size == 0) { + ERROR_LOG("file size is 0"); + file.close(); + return false; + } + if (size > bufferSize) { + ERROR_LOG("file size is larger than buffer size"); + file.close(); + return false; + } + buf->pubseekpos(0, std::ios::in); + buf->sgetn(static_cast(buffer), size); + fileSize = size; + file.close(); + return true; +} + +bool WriteFile(const std::string &filePath, const void *buffer, size_t size) +{ + if (buffer == nullptr) { + ERROR_LOG("Write file failed. buffer is nullptr"); + return false; + } + + int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE); + if (fd < 0) { + ERROR_LOG("Open file failed. path = %s", filePath.c_str()); + return false; + } + + size_t writeSize = write(fd, buffer, size); + (void)close(fd); + if (writeSize != size) { + ERROR_LOG("Write file Failed."); + return false; + } + + return true; +} diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/main.cpp b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/main.cpp new file mode 100644 index 000000000..e864046b6 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/main.cpp @@ -0,0 +1,163 @@ +/** + * @file main.cpp + * + * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#include +#include +#include + +#include +#include + +#include "acl/acl.h" +#include "common.h" +#include "op_runner.h" + +bool g_isDevice = false; +int deviceId = 0; + +OperatorDesc CreateOpDesc() +{ + // define operator + std::vector shape{128}; + aclDataType dataType = ACL_FLOAT16; + aclDataType dataType1 = ACL_UINT32; + aclFormat format = ACL_FORMAT_ND; + OperatorDesc opDesc; + opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format); + opDesc.AddInputTensorDesc(dataType1, shape.size(), shape.data(), format); + opDesc.AddOutputTensorDesc(dataType, shape.size(), shape.data(), format); + return opDesc; +} + +bool SetInputData(OpRunner &runner) +{ + size_t fileSize = 0; + ReadFile("../input/src.bin", fileSize, runner.GetInputBuffer(0), runner.GetInputSize(0)); + ReadFile("../input/dst_Offset.bin", fileSize, runner.GetInputBuffer(1), runner.GetInputSize(1)); + INFO_LOG("Set input success"); + return true; +} + +bool ProcessOutputData(OpRunner &runner) +{ + WriteFile("../output/output_dst.bin", runner.GetOutputBuffer(0), runner.GetOutputSize(0)); + INFO_LOG("Write output success"); + return true; +} + +void DestroyResource() +{ + bool flag = false; + if (aclrtResetDevice(deviceId) != ACL_SUCCESS) { + ERROR_LOG("Reset device %d failed", deviceId); + flag = true; + } + INFO_LOG("Reset Device success"); + if (aclFinalize() != ACL_SUCCESS) { + ERROR_LOG("Finalize acl failed"); + flag = true; + } + if (flag) { + ERROR_LOG("Destroy resource failed"); + } else { + INFO_LOG("Destroy resource success"); + } +} + +bool InitResource() +{ + std::string output = "../output"; + if (access(output.c_str(), 0) == -1) { + int ret = mkdir(output.c_str(), 0700); + if (ret == 0) { + INFO_LOG("Make output directory successfully"); + } else { + ERROR_LOG("Make output directory fail"); + return false; + } + } + + // acl.json is dump or profiling config file + if (aclInit("../scripts/acl.json") != ACL_SUCCESS) { + ERROR_LOG("acl init failed"); + return false; + } + + if (aclrtSetDevice(deviceId) != ACL_SUCCESS) { + ERROR_LOG("Set device failed. deviceId is %d", deviceId); + (void)aclFinalize(); + return false; + } + INFO_LOG("Set device[%d] success", deviceId); + + // runMode is ACL_HOST which represents app is running in host + // runMode is ACL_DEVICE which represents app is running in device + aclrtRunMode runMode; + if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) { + ERROR_LOG("Get run mode failed"); + DestroyResource(); + return false; + } + g_isDevice = (runMode == ACL_DEVICE); + INFO_LOG("Get RunMode[%d] success", runMode); + + return true; +} + +bool RunOp() +{ + // create op desc + OperatorDesc opDesc = CreateOpDesc(); + + // create Runner + OpRunner opRunner(&opDesc); + if (!opRunner.Init()) { + ERROR_LOG("Init OpRunner failed"); + return false; + } + + // Load inputs + if (!SetInputData(opRunner)) { + ERROR_LOG("Set input data failed"); + return false; + } + + // Run op + if (!opRunner.RunOp()) { + ERROR_LOG("Run op failed"); + return false; + } + + // process output data + if (!ProcessOutputData(opRunner)) { + ERROR_LOG("Process output data failed"); + return false; + } + + INFO_LOG("Run op success"); + return true; +} + +int main(int argc, char **argv) +{ + if (!InitResource()) { + ERROR_LOG("Init resource failed"); + return FAILED; + } + INFO_LOG("Init resource success"); + + if (!RunOp()) { + DestroyResource(); + return FAILED; + } + + DestroyResource(); + + return SUCCESS; +} diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/op_runner.cpp b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/op_runner.cpp new file mode 100644 index 000000000..5166558dd --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/op_runner.cpp @@ -0,0 +1,457 @@ +/** + * @file op_runner.cpp + * + * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#include "op_runner.h" + +#include +#include + +#include "acl/acl_op_compiler.h" +#include "aclnn_scatter_custom.h" +#include "common.h" + +using namespace std; + +extern bool g_isDevice; + +OpRunner::OpRunner(OperatorDesc *opDesc) : opDesc_(opDesc) +{ + numInputs_ = opDesc->inputDesc.size(); + numOutputs_ = opDesc->outputDesc.size(); + workspace_ = nullptr; +} + +OpRunner::~OpRunner() +{ + if (workspace_ != nullptr) { + (void)aclrtFree(workspace_); + } + for (size_t i = 0; i < numInputs_; ++i) { + (void)aclDestroyTensor(inputTensor_[i]); + (void)aclDestroyDataBuffer(inputBuffers_[i]); + (void)aclrtFree(devInputs_[i]); + if (g_isDevice) { + (void)aclrtFree(hostInputs_[i]); + } else { + (void)aclrtFreeHost(hostInputs_[i]); + } + } + + for (size_t i = 0; i < numOutputs_; ++i) { + (void)aclDestroyTensor(outputTensor_[i]); + (void)aclDestroyDataBuffer(outputBuffers_[i]); + (void)aclrtFree(devOutputs_[i]); + if (g_isDevice) { + (void)aclrtFree(hostOutputs_[i]); + } else { + (void)aclrtFreeHost(hostOutputs_[i]); + } + } +} + +bool OpRunner::Init() +{ + for (size_t i = 0; i < numInputs_; ++i) { + auto size = GetInputSize(i); + void *devMem = nullptr; + if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { + ERROR_LOG("Malloc device memory for input[%zu] failed", i); + return false; + } + devInputs_.emplace_back(devMem); + inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size)); + + void *hostInput = nullptr; + if (g_isDevice) { + if (aclrtMalloc(&hostInput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { + ERROR_LOG("Malloc device memory for input[%zu] failed", i); + return false; + } + } else { + if (aclrtMallocHost(&hostInput, size) != ACL_SUCCESS) { + ERROR_LOG("Malloc device memory for input[%zu] failed", i); + return false; + } + } + if (hostInput == nullptr) { + ERROR_LOG("Malloc memory for input[%zu] failed", i); + return false; + } + hostInputs_.emplace_back(hostInput); + + aclTensor *inputTensor = + aclCreateTensor(GetInputShape(i).data(), GetInputNumDims(i), GetInputDataType(i), nullptr, 0, + GetInputFormat(i), GetInputShape(i).data(), GetInputNumDims(i), devInputs_[i]); + if (inputTensor == nullptr) { + ERROR_LOG("Create Tensor for input[%zu] failed", i); + return false; + } + inputTensor_.emplace_back(inputTensor); + } + + for (size_t i = 0; i < numOutputs_; ++i) { + auto size = GetOutputSize(i); + void *devMem = nullptr; + if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { + ERROR_LOG("Malloc device memory for output[%zu] failed", i); + return false; + } + devOutputs_.emplace_back(devMem); + outputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size)); + + void *hostOutput = nullptr; + if (g_isDevice) { + if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { + ERROR_LOG("Malloc device memory for output[%zu] failed", i); + return false; + } + } else { + if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) { + ERROR_LOG("Malloc device memory for output[%zu] failed", i); + return false; + } + } + if (hostOutput == nullptr) { + ERROR_LOG("Malloc host memory for output[%zu] failed", i); + return false; + } + hostOutputs_.emplace_back(hostOutput); + + aclTensor *outputTensor = + aclCreateTensor(GetOutputShape(i).data(), GetOutputNumDims(i), GetOutputDataType(i), nullptr, 0, + GetOutputFormat(i), GetOutputShape(i).data(), GetOutputNumDims(i), devOutputs_[i]); + if (outputTensor == nullptr) { + ERROR_LOG("Create Tensor for output[%zu] failed", i); + return false; + } + outputTensor_.emplace_back(outputTensor); + } + + return true; +} + +const size_t OpRunner::NumInputs() +{ + return numInputs_; +} + +const size_t OpRunner::NumOutputs() +{ + return numOutputs_; +} + +const size_t OpRunner::GetInputSize(size_t index) const +{ + if (index >= numInputs_) { + ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); + return 0; + } + + return aclGetTensorDescSize(opDesc_->inputDesc[index]); +} + +const size_t OpRunner::GetInputNumDims(size_t index) const +{ + if (index >= numInputs_) { + ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); + return 0; + } + + return aclGetTensorDescNumDims(opDesc_->inputDesc[index]); +} + +aclDataType OpRunner::GetInputDataType(size_t index) const +{ + if (index >= numInputs_) { + ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); + return ACL_DT_UNDEFINED; + } + + return aclGetTensorDescType(opDesc_->inputDesc[index]); +} + +aclFormat OpRunner::GetInputFormat(size_t index) const +{ + if (index >= numInputs_) { + ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); + return ACL_FORMAT_UNDEFINED; + } + + return aclGetTensorDescFormat(opDesc_->inputDesc[index]); +} + +std::vector OpRunner::GetInputShape(size_t index) const +{ + std::vector ret; + if (index >= numInputs_) { + ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); + return ret; + } + + auto desc = opDesc_->inputDesc[index]; + for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) { + int64_t dimSize; + if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) { + ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i); + ret.clear(); + return ret; + } + ret.emplace_back(dimSize); + } + + return ret; +} + +size_t OpRunner::GetOutputSize(size_t index) const +{ + if (index >= numOutputs_) { + ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); + return 0; + } + + return aclGetTensorDescSize(opDesc_->outputDesc[index]); +} + +const size_t OpRunner::GetOutputNumDims(size_t index) const +{ + if (index >= numOutputs_) { + ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); + return 0; + } + + return aclGetTensorDescNumDims(opDesc_->outputDesc[index]); +} + +aclDataType OpRunner::GetOutputDataType(size_t index) const +{ + if (index >= numOutputs_) { + ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); + return ACL_DT_UNDEFINED; + } + + return aclGetTensorDescType(opDesc_->outputDesc[index]); +} + +aclFormat OpRunner::GetOutputFormat(size_t index) const +{ + if (index >= numOutputs_) { + ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); + return ACL_FORMAT_UNDEFINED; + } + + return aclGetTensorDescFormat(opDesc_->outputDesc[index]); +} + +std::vector OpRunner::GetOutputShape(size_t index) const +{ + std::vector ret; + if (index >= numOutputs_) { + ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); + return ret; + } + + auto desc = opDesc_->outputDesc[index]; + for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) { + int64_t dimSize; + if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) { + ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i); + ret.clear(); + return ret; + } + ret.emplace_back(dimSize); + } + return ret; +} + +size_t OpRunner::GetInputElementCount(size_t index) const +{ + if (index >= opDesc_->inputDesc.size()) { + ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); + return 0; + } + + return aclGetTensorDescElementCount(opDesc_->inputDesc[index]); +} + +size_t OpRunner::GetOutputElementCount(size_t index) const +{ + if (index >= opDesc_->outputDesc.size()) { + ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); + return 0; + } + + return aclGetTensorDescElementCount(opDesc_->outputDesc[index]); +} + +bool OpRunner::RunOp() +{ + for (size_t i = 0; i < numInputs_; ++i) { + auto size = GetInputSize(i); + aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE; + if (g_isDevice) { + kind = ACL_MEMCPY_DEVICE_TO_DEVICE; + } + if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) { + ERROR_LOG("Copy input[%zu] failed", i); + return false; + } + INFO_LOG("Copy input[%zu] success", i); + } + + aclrtStream stream = nullptr; + if (aclrtCreateStream(&stream) != ACL_SUCCESS) { + ERROR_LOG("Create stream failed"); + return false; + } + INFO_LOG("Create stream success"); + + size_t workspaceSize = 0; + aclOpExecutor *handle = nullptr; + auto ret = + aclnnScatterCustomGetWorkspaceSize(inputTensor_[0], inputTensor_[1], outputTensor_[0], &workspaceSize, &handle); + if (ret != ACL_SUCCESS) { + (void)aclrtDestroyStream(stream); + ERROR_LOG("Get Operator Workspace failed. error code is %d", static_cast(ret)); + return false; + } + INFO_LOG("Execute aclnnScatterCustomGetWorkspaceSize success, workspace size %lu", workspaceSize); + + if (workspaceSize != 0) { + if (aclrtMalloc(&workspace_, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { + ERROR_LOG("Malloc device memory failed"); + } + } + + ret = aclnnScatterCustom(workspace_, workspaceSize, handle, stream); + if (ret != ACL_SUCCESS) { + (void)aclrtDestroyStream(stream); + ERROR_LOG("Execute Operator failed. error code is %d", static_cast(ret)); + return false; + } + INFO_LOG("Execute aclnnScatterCustom success"); + + // The unit of 5000 is ms. + ret = aclrtSynchronizeStreamWithTimeout(stream, 5000); + if (ret != SUCCESS) { + ERROR_LOG("Synchronize stream failed. error code is %d", static_cast(ret)); + (void)aclrtDestroyStream(stream); + return false; + } + INFO_LOG("Synchronize stream success"); + + for (size_t i = 0; i < numOutputs_; ++i) { + auto size = GetOutputSize(i); + aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST; + if (g_isDevice) { + kind = ACL_MEMCPY_DEVICE_TO_DEVICE; + } + if (aclrtMemcpy(hostOutputs_[i], size, devOutputs_[i], size, kind) != ACL_SUCCESS) { + INFO_LOG("Copy output[%zu] success", i); + (void)aclrtDestroyStream(stream); + return false; + } + INFO_LOG("Copy output[%zu] success", i); + } + + (void)aclrtDestroyStream(stream); + return true; +} + +template void DoPrintData(const T *data, size_t count, size_t elementsPerRow) +{ + assert(elementsPerRow != 0); + for (size_t i = 0; i < count; ++i) { + std::cout << std::setw(10) << data[i]; + if (i % elementsPerRow == elementsPerRow - 1) { + std::cout << std::endl; + } + } +} + +void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow) +{ + assert(elementsPerRow != 0); + for (size_t i = 0; i < count; ++i) { + std::cout << std::setw(10) << std::setprecision(4) << aclFloat16ToFloat(data[i]); + if (i % elementsPerRow == elementsPerRow - 1) { + std::cout << std::endl; + } + } +} + +void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow) +{ + if (data == nullptr) { + ERROR_LOG("Print data failed. data is nullptr"); + return; + } + + switch (dataType) { + case ACL_BOOL: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case ACL_INT8: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case ACL_UINT8: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case ACL_INT16: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case ACL_UINT16: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case ACL_INT32: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case ACL_UINT32: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case ACL_INT64: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case ACL_UINT64: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case ACL_FLOAT16: + DoPrintFp16Data(reinterpret_cast(data), count, elementsPerRow); + break; + case ACL_FLOAT: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case ACL_DOUBLE: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + default: + ERROR_LOG("Unsupported type: %d", dataType); + } +} + +void OpRunner::PrintInput(size_t index, size_t numElementsPerRow) +{ + if (index >= numInputs_) { + ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_); + return; + } + + auto desc = opDesc_->inputDesc[index]; + PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow); +} + +void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow) +{ + if (index >= numOutputs_) { + ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); + return; + } + + auto desc = opDesc_->outputDesc[index]; + PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow); +} diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/operator_desc.cpp b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/operator_desc.cpp new file mode 100644 index 000000000..da04cf6c9 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/operator_desc.cpp @@ -0,0 +1,51 @@ +/** + * @file operator_desc.cpp + * + * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#include "operator_desc.h" + +#include "common.h" + +using namespace std; + +OperatorDesc::OperatorDesc() {} + +OperatorDesc::~OperatorDesc() +{ + for (auto *desc : inputDesc) { + aclDestroyTensorDesc(desc); + } + + for (auto *desc : outputDesc) { + aclDestroyTensorDesc(desc); + } +} + +OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format) +{ + aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format); + if (desc == nullptr) { + ERROR_LOG("create tensor failed"); + return *this; + } + inputDesc.emplace_back(desc); + return *this; +} + +OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, + aclFormat format) +{ + aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format); + if (desc == nullptr) { + ERROR_LOG("create tensor failed"); + return *this; + } + + outputDesc.emplace_back(desc); + return *this; +} diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/README.md b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/README.md new file mode 100644 index 000000000..5438be0a9 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/README.md @@ -0,0 +1,109 @@ +## 概述 +本样例基于ScatterCustom算子工程,介绍了msOpGen工具生成简易自定义算子工程和单算子调用。 + +## 目录结构介绍 +``` +├── 28_Scatter_frameworklaunch // 使用框架调用的方式调用Scatter算子 +│ ├── AclNNInvocationNaive // 通过aclnn调用的方式调用ScatterCustom算子, 简化了编译脚本 +│ ├── ScatterCustom // ScatterCustom算子工程 +│ ├── ScatterCustom.json // ScatterCustom算子的原型定义json文件 +│ └── install.sh // 脚本,调用msOpGen生成简易自定义算子工程,并编译 +``` + +## 算子描述 +Scatter算子实现了根据偏移地址生成新的结果张量后将输入张量分散到结果张量 + +## 算子规格描述 + + + + + + + + + + + +
算子类型(OpType)Scatter
算子输入nameshapedata typeformat
x8 * 2048floatND
y8 * 2048floatND
算子输出z8 * 2048floatND
核函数名Scatter_custom
+ +## 支持的产品型号 +本样例支持如下产品型号: +- Atlas 训练系列产品 +- Atlas 推理系列产品AI Core +- Atlas A2训练系列产品/Atlas A2推理系列产品 +- Atlas 200/500 A2推理产品 + +## 算子工程介绍 +其中,算子工程目录ScatterCustom包含算子的实现文件,如下所示: +``` +├── ScatterCustom // Scatter自定义算子工程 +│ ├── op_host // host侧实现文件 +│ └── op_kernel // kernel侧实现文件 +``` +CANN软件包中提供了工程创建工具msOpGen,ScatterCustom算子工程可通过ScatterCustom.json自动创建,简易自定义算子工程具体请参考[Ascend C算子开发](https://hiascend.com/document/redirect/CannCommunityOpdevAscendC)>附录>简易自定义算子工程 章节。 + +创建完简易自定义算子工程后,开发者重点需要完成算子工程目录CustomOp下host和kernel的功能开发。为简化样例运行流程,本样例已在ScatterCustom目录准备好了必要的算子实现,install.sh脚本会自动将实现复制到CustomOp对应目录下,再编译算子。 + +## 编译运行样例算子 +针对简易自定义算子工程,编译运行包含如下步骤: +- 调用msOpGen工具生成简易自定义算子工程; +- 完成算子host和kernel实现; +- 编译简易自定义算子工程; +- 调用执行自定义算子; + +详细操作如下所示。 +### 1. 获取源码包 +编译运行此样例前,请参考[准备:获取样例代码](../README.md#codeready)获取源码包。 + +### 2. 生成简易自定义算子工程,复制host和kernel实现并编译算子 + - 切换到msOpGen脚本install.sh所在目录 + ```bash + # 若开发者以git命令行方式clone了master分支代码,并切换目录 + cd ${git_clone_path}/samples/operator/ascendc/0_introduction/28_Scatter_frameworklaunch + ``` + + - 调用脚本,生成简易自定义算子工程,复制host和kernel实现并编译算子 + - 方式一:配置环境变量运行脚本 + 请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware),选择对应配置环境变量命令。 + - 默认路径,root用户安装CANN软件包 + ```bash + export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest + ``` + - 默认路径,非root用户安装CANN软件包 + ```bash + export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest + ``` + - 指定路径install_path,安装CANN软件包 + ```bash + export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest + ``` + 运行install.sh脚本 + ```bash + bash install.sh -v [SOC_VERSION] + ``` + - 方式二:指定命令行安装路径来运行脚本 + ```bash + bash install.sh -v [SOC_VERSION] -i [ASCEND_INSTALL_PATH] + ``` + 参数说明: + - SOC_VERSION:昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号: + - Atlas 训练系列产品 + - Atlas 推理系列产品AI Core + - Atlas A2训练系列产品/Atlas A2推理系列产品 + - Atlas 200/500 A2推理产品 + - ASCEND_INSTALL_PATH:CANN软件包安装路径 + + 脚本运行成功后,会在当前目录下创建CustomOp目录,编译完成后,会在CustomOp/build_out/op_api/lib目录下生成自定义算子库文件libcust_opapi.so,在CustomOp/build_out/op_api/include目录下生成aclnn接口的头文件。 + + 备注:如果要使用dump调试功能,需要移除op_host内的Atlas 训练系列产品、Atlas 200/500 A2 推理产品的配置项。 + +### 3. 调用执行算子工程 +- [aclnn调用ScatterCustom算子工程(代码简化)](./AclNNInvocationNaive/README.md) + +## 更新说明 +| 时间 | 更新事项 | +| ---------- | ---------------------------- | +| 2024/10/21 | 初始版本 | +| 2024/11/11 | 样例目录调整 | +| 2024/11/18 | README.md更新 | diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom.json b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom.json new file mode 100644 index 000000000..530e9779d --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom.json @@ -0,0 +1,39 @@ +[ + { + "op": "ScatterCustom", + "input_desc": [ + { + "name": "x", + "param_type": "required", + "format": [ + "ND" + ], + "type": [ + "float16" + ] + }, + { + "name": "y", + "param_type": "required", + "format": [ + "ND" + ], + "type": [ + "float16" + ] + } + ], + "output_desc": [ + { + "name": "z", + "param_type": "required", + "format": [ + "ND" + ], + "type": [ + "float16" + ] + } + ] + } +] \ No newline at end of file diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom.cpp b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom.cpp new file mode 100644 index 000000000..d7eb2f964 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom.cpp @@ -0,0 +1,69 @@ +/** + * @file scatter_custom.cpp + * + * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#include "scatter_custom_tiling.h" +#include "register/op_def_registry.h" + +namespace optiling { +const uint32_t BLOCK_DIM = 1; +static ge::graphStatus TilingFunc(gert::TilingContext *context) +{ + context->SetBlockDim(BLOCK_DIM); + size_t *currentWorkspace = context->GetWorkspaceSizes(1); + currentWorkspace[0] = 0; + return ge::GRAPH_SUCCESS; +} +} // namespace optiling + +namespace ge { +static graphStatus InferShape(gert::InferShapeContext *context) +{ + const gert::Shape *x1_shape = context->GetInputShape(0); + gert::Shape *y_shape = context->GetOutputShape(0); + *y_shape = *x1_shape; + return GRAPH_SUCCESS; +} + +static graphStatus InferDataType(gert::InferDataTypeContext *context) +{ + const auto inputDataType = context->GetInputDataType(0); + context->SetOutputDataType(0, inputDataType); + return ge::GRAPH_SUCCESS; +} +} // namespace ge + +namespace ops { +class ScatterCustom : public OpDef { +public: + explicit ScatterCustom(const char *name) : OpDef(name) + { + this->Input("x") + .ParamType(REQUIRED) + .DataType({ge::DT_FLOAT16}) + .Format({ge::FORMAT_ND}); + this->Input("y") + .ParamType(REQUIRED) + .DataType({ge::DT_UINT32}) + .Format({ge::FORMAT_ND}); + this->Output("z") + .ParamType(REQUIRED) + .DataType({ge::DT_FLOAT16}) + .Format({ge::FORMAT_ND}); + + this->SetInferShape(ge::InferShape).SetInferDataType(ge::InferDataType); + this->AICore() + .SetTiling(optiling::TilingFunc) + .AddConfig("ascend910") + .AddConfig("ascend310p") + .AddConfig("ascend310b") + .AddConfig("ascend910b"); + } +}; +OP_ADD(ScatterCustom); +} // namespace ops diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom_tiling.h b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom_tiling.h new file mode 100644 index 000000000..38a50322c --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom_tiling.h @@ -0,0 +1,22 @@ +/** + * @file Scatter_custom_tiling.h + * + * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#ifndef SCATTER_CUSTOM_TILING_H +#define SCATTER_CUSTOM_TILING_H +#include "register/tilingdata_base.h" + +namespace optiling { +BEGIN_TILING_DATA_DEF(TilingData) +TILING_DATA_FIELD_DEF(uint32_t, totalLength); +TILING_DATA_FIELD_DEF(uint32_t, tileNum); +END_TILING_DATA_DEF; + +REGISTER_TILING_DATA_CLASS(ScatterCustom, TilingData) +} // namespace optiling +#endif // ADD_CUSTOM_TILING_H diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_kernel/scatter_custom.cpp b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_kernel/scatter_custom.cpp new file mode 100644 index 000000000..80052b344 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_kernel/scatter_custom.cpp @@ -0,0 +1,92 @@ +/** + * @file scatter_custom.cpp + * + * Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#include "kernel_operator.h" +constexpr int32_t BUFFER_NUM = 1; // tensor num for each queue + +template +class ScatterTest { +public: + __aicore__ inline ScatterTest() {} + __aicore__ inline void Init(GM_ADDR srcGm, GM_ADDR dstOffsetGm, GM_ADDR dstGm, uint32_t count) + { + mElementCount = count; + xGm.SetGlobalBuffer((__gm__ DTYPE_X *)srcGm, mElementCount); + yGm.SetGlobalBuffer((__gm__ DTYPE_Y *)dstOffsetGm, mElementCount); + zGm.SetGlobalBuffer((__gm__ DTYPE_Z *)dstGm, mElementCount); + pipe.InitBuffer(inQueueX, BUFFER_NUM, mElementCount * sizeof(DTYPE_X)); + pipe.InitBuffer(inQueueY, BUFFER_NUM, mElementCount * sizeof(DTYPE_Y)); + pipe.InitBuffer(outQueueZ, BUFFER_NUM, mElementCount * sizeof(DTYPE_Z)); + } + __aicore__ inline void Process() + { + CopyIn(); + Compute(); + CopyOut(); + } + +private: + __aicore__ inline void CopyIn() + { + AscendC::LocalTensor xLocal = inQueueX.AllocTensor(); + AscendC::LocalTensor yLocal = inQueueY.AllocTensor(); + AscendC::DataCopy(xLocal, xGm, mElementCount); + AscendC::DataCopy(yLocal, yGm, mElementCount); + inQueueX.EnQue(xLocal); + inQueueY.EnQue(yLocal); + } + __aicore__ inline void Compute() + { + AscendC::LocalTensor xLocal = inQueueX.DeQue(); + AscendC::LocalTensor yLocal = inQueueY.DeQue(); + AscendC::LocalTensor zLocal = outQueueZ.AllocTensor(); + for (uint32_t i = 0; i < mElementCount; ++i) { + auto offset = yLocal.GetValue(i) / sizeof(T); + auto srcValue = xLocal.GetValue(i); + zLocal.SetValue(offset, srcValue); + } + outQueueZ.EnQue(zLocal); + inQueueX.FreeTensor(xLocal); + inQueueY.FreeTensor(yLocal); + } + __aicore__ inline void CopyOut() + { + AscendC::LocalTensor zLocal = outQueueZ.DeQue(); + AscendC::DataCopy(zGm, zLocal, mElementCount); + outQueueZ.FreeTensor(zLocal); + } + +private: + AscendC::TPipe pipe; + AscendC::TQue inQueueX, inQueueY; + AscendC::TQue outQueueZ; + AscendC::GlobalTensor xGm; + AscendC::GlobalTensor yGm; + AscendC::GlobalTensor zGm; + uint32_t mElementCount; +}; + +#define KERNEL_SCATTER(T, count) \ + extern "C" __global__ __aicore__ void scatter_custom(GM_ADDR srcGm, GM_ADDR dstOffsetGm, \ + GM_ADDR dstGm, GM_ADDR workspace, GM_ADDR tiling) \ + { \ + ScatterTest op; \ + op.Init(srcGm, dstOffsetGm, dstGm, count); \ + op.Process(); \ + } + +KERNEL_SCATTER(half, 128); +#ifndef ASCENDC_CPU_DEBUG +// call of kernel function +void Scatter_custom_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *srcGm, uint8_t *dstOffsetGm, uint8_t *dstGm, + uint8_t *workspace, uint8_t *tiling) +{ + scatter_custom<<>>(srcGm, dstOffsetGm, dstGm, workspace, tiling); +} +#endif \ No newline at end of file diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/install.sh b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/install.sh new file mode 100644 index 000000000..bf2ba6ec4 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/install.sh @@ -0,0 +1,54 @@ +#!/bin/bash +SHORT=v:,i:, +LONG=soc-version:,install-path:, +OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") +eval set -- "$OPTS" + +while :; do + case "$1" in + -v | --soc-version) + SOC_VERSION="$2" + shift 2 + ;; + -i | --install-path) + ASCEND_INSTALL_PATH="$2" + shift 2 + ;; + --) + shift + break + ;; + *) + echo "[ERROR]: Unexpected option: $1" + break + ;; + esac +done + +VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4" +if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then + echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]" + exit -1 +fi + +if [ -n "$ASCEND_INSTALL_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH +elif [ -n "$ASCEND_HOME_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH +else + if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then + _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest + else + _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest + fi +fi +source $_ASCEND_INSTALL_PATH/bin/setenv.bash +export ASCEND_HOME_PATH=$_ASCEND_INSTALL_PATH + +rm -rf CustomOp +# Generate the op framework +msopgen gen -i ScatterCustom.json -f aclnn -c ai_core-${SOC_VERSION} -lan cpp -out CustomOp +# Copy ScatterCustom op implementation files to CustomOp +cp -rf ScatterCustom/* CustomOp +# build CustomOp, compile ScatterCustom op +(cd CustomOp && bash build.sh) \ No newline at end of file diff --git a/operator/ascendc/0_introduction/README.md b/operator/ascendc/0_introduction/README.md index 0b9383e71..3209760d6 100644 --- a/operator/ascendc/0_introduction/README.md +++ b/operator/ascendc/0_introduction/README.md @@ -42,6 +42,7 @@ | [25_simple_add](./25_simple_add) | Ascend C异构混合编程样例, 实现Add自定义Vector算子及调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品 | [26_simple_matmulleakyrelu](./26_simple_matmulleakyrelu) | Ascend C异构混合编程样例, 实现MatmulLeakyRelu自定义Cube+Vector算子及调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品 | [27_simple_add_cpp_extensions](./27_simple_add_cpp_extensions) | Ascend C异构混合编程样例, 实现Add自定义Vector算子动态库及pybind调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品 +| [28_Scatter_frameworklaunch](./28_Scatter_frameworklaunch) | 基于Ascend C的Scatter算子及FrameworkLaunch调用样例 | Atlas 训练系列产品
Atlas 推理系列产品AICore
Atlas A2训练系列产品/Atlas A2推理产品| ## 获取样例代码 可以使用以下两种方式下载,请选择其中一种进行源码准备。 -- Gitee From 330b0913be01c9b81eacaf4a1f5910bd3fdc787e Mon Sep 17 00:00:00 2001 From: Zql99 Date: Mon, 17 Nov 2025 19:49:20 +0800 Subject: [PATCH 2/3] add scatter operator --- .../AclNNInvocation/README.md | 55 --- .../AclNNInvocation/inc/common.h | 45 -- .../AclNNInvocation/inc/op_runner.h | 181 ------- .../AclNNInvocation/inc/operator_desc.h | 57 --- .../AclNNInvocation/input/.keep | 0 .../AclNNInvocation/run.sh | 75 --- .../AclNNInvocation/scripts/acl.json | 1 - .../AclNNInvocation/scripts/gen_data.py | 24 - .../AclNNInvocation/src/CMakeLists.txt | 65 --- .../AclNNInvocation/src/common.cpp | 80 --- .../AclNNInvocation/src/main.cpp | 163 ------- .../AclNNInvocation/src/op_runner.cpp | 457 ------------------ .../AclNNInvocation/src/operator_desc.cpp | 51 -- .../28_scatter_frameworklaunch/README.md | 109 ----- .../ScatterCustom.json | 39 -- .../ScatterCustom/op_host/scatter_custom.cpp | 69 --- .../op_host/scatter_custom_tiling.h | 22 - .../28_scatter_frameworklaunch/install.sh | 54 --- .../ScatterKernelInvocationAcl/CMakeLists.txt | 44 ++ .../ScatterKernelInvocationAcl/README.md | 72 +++ .../cmake/cpu_lib.cmake | 9 + .../cmake/npu_lib.cmake | 10 + .../ScatterKernelInvocationAcl/data_utils.h | 242 ++++++++++ .../ScatterKernelInvocationAcl/main.cpp | 100 ++++ .../ScatterKernelInvocationAcl/run.sh | 122 +++++ .../scatter_custom.cpp | 77 ++- .../scripts/gen_data.py | 27 ++ .../scripts/verify_result.py | 18 +- operator/ascendc/0_introduction/README.md | 2 +- 29 files changed, 672 insertions(+), 1598 deletions(-) delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/README.md delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/common.h delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/op_runner.h delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/operator_desc.h delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/input/.keep delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/run.sh delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/acl.json delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/gen_data.py delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/CMakeLists.txt delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/common.cpp delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/main.cpp delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/op_runner.cpp delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/operator_desc.cpp delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/README.md delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom.json delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom.cpp delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom_tiling.h delete mode 100644 operator/ascendc/0_introduction/28_scatter_frameworklaunch/install.sh create mode 100644 operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/CMakeLists.txt create mode 100644 operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/README.md create mode 100644 operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/cmake/cpu_lib.cmake create mode 100644 operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/cmake/npu_lib.cmake create mode 100644 operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/data_utils.h create mode 100644 operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/main.cpp create mode 100644 operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/run.sh rename operator/ascendc/0_introduction/{28_scatter_frameworklaunch/ScatterCustom/op_kernel => 28_scatter_kernellaunch/ScatterKernelInvocationAcl}/scatter_custom.cpp (46%) create mode 100644 operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scripts/gen_data.py rename operator/ascendc/0_introduction/{28_scatter_frameworklaunch/AclNNInvocation => 28_scatter_kernellaunch/ScatterKernelInvocationAcl}/scripts/verify_result.py (61%) diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/README.md b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/README.md deleted file mode 100644 index 7ec773047..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/README.md +++ /dev/null @@ -1,55 +0,0 @@ -## 目录结构介绍 -``` -├── AclNNInvocation //通过aclnn调用的方式调用ScatterCustom算子 -│ ├── inc // 头文件目录 -│ │ ├── common.h // 声明公共方法类,用于读取二进制文件 -│ │ ├── op_runner.h // 算子描述声明文件,包含算子输入/输出,算子类型以及输入描述与输出描述 -│ │ └── operator_desc.h // 算子运行相关信息声明文件,包含算子输入/输出个数,输入/输出大小等 -│ ├── input // 存放脚本生成的输入数据目录 -│ ├── output // 存放算子运行输出数据和真值数据的目录 -│ ├── scripts -│ │ ├── acl.json // acl配置文件 -│ │ ├── gen_data.py // 输入数据和真值数据生成脚本 -│ │ └── verify_result.py // 真值对比文件 -│ ├── src -│ │ ├── CMakeLists.txt // 编译规则文件 -│ │ ├── common.cpp // 公共函数,读取二进制文件函数的实现文件 -│ │ ├── main.cpp // 单算子调用应用的入口 -│ │ ├── op_runner.cpp // 单算子调用主体流程实现文件 -│ │ └── operator_desc.cpp // 构造算子的输入与输出描述 -│ └── run.sh // 执行命令脚本 -``` -## 代码实现介绍 -完成自定义算子的开发部署后,可以通过单算子调用的方式来验证单算子的功能。src/main.cpp代码为单算子API执行方式。单算子API执行是基于C语言的API执行算子,无需提供单算子描述文件进行离线模型的转换,直接调用单算子API接口。 - -自定义算子编译部署后,会自动生成单算子API,可以直接在应用程序中调用。算子API的形式一般定义为“两段式接口”,形如: - ```cpp - // 获取算子使用的workspace空间大小 - aclnnStatus aclnnScatterCustomGetWorkspaceSize(const aclTensor *x, const aclTensor *y, const aclTensor *out, uint64_t *workspaceSize, aclOpExecutor **executor); - // 执行算子 - aclnnStatus aclnnScatterCustom(void *workspace, int64_t workspaceSize, aclOpExecutor *executor, aclrtStream stream); - ``` -其中aclnnScatterCustomGetWorkspaceSize为第一段接口,主要用于计算本次API调用计算过程中需要多少的workspace内存。获取到本次API计算需要的workspace大小之后,开发者可以按照workspaceSize大小申请Device侧内存,然后调用第二段接口aclnnScatterCustom执行计算。具体参考[单算子API调用](https://hiascend.com/document/redirect/CannCommunityAscendCInVorkSingleOp)章节。 - -## 运行样例算子 -### 1. 编译算子工程 -运行此样例前,请参考[编译算子工程](../README.md#operatorcompile)完成前期准备。 -### 2. aclnn调用样例运行 - - - 进入到样例目录 - 以命令行方式下载样例代码,master分支为例。 - ```bash - cd ${git_clone_path}/samples/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation - ``` - - 样例执行 - - 样例执行过程中会自动生成测试数据,然后编译与运行aclnn样例,最后检验运行结果。具体过程可参见run.sh脚本。 - - ```bash - bash run.sh - ``` -## 更新说明 -| 时间 | 更新事项 | -| ---------- | ------------ | -| 2025/11/12 | 新增本readme | -| 2025/11/12 | 样例目录调整 | \ No newline at end of file diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/common.h b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/common.h deleted file mode 100644 index 11bb4aeca..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/common.h +++ /dev/null @@ -1,45 +0,0 @@ -/** - * @file common.h - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef COMMON_H -#define COMMON_H - -#include -#include -#include -#include -#include - -#include "acl/acl.h" - -#define SUCCESS 0 -#define FAILED 1 - -#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args) -#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN] " fmt "\n", ##args) -#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR] " fmt "\n", ##args) - -/** - * @brief Read data from file - * @param [in] filePath: file path - * @param [out] fileSize: file size - * @return read result - */ -bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize); - -/** - * @brief Write data to file - * @param [in] filePath: file path - * @param [in] buffer: data to write to file - * @param [in] size: size to write - * @return write result - */ -bool WriteFile(const std::string &filePath, const void *buffer, size_t size); - -#endif // COMMON_H diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/op_runner.h b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/op_runner.h deleted file mode 100644 index f1b3a6706..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/op_runner.h +++ /dev/null @@ -1,181 +0,0 @@ -/** - * @file op_runner.h - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef OP_RUNNER_H -#define OP_RUNNER_H - -#include "acl/acl.h" -#include "aclnn/acl_meta.h" -#include "common.h" -#include "operator_desc.h" - -/** - * Op Runner - */ -class OpRunner { -public: - /** - * @brief Constructor - * @param [in] opDesc: op description - */ - explicit OpRunner(OperatorDesc *opDesc); - - /** - * @brief Destructor - */ - virtual ~OpRunner(); - - /** - * @brief Init op runner - */ - bool Init(); - - /** - * @brief Get number of inputs - * @return number of inputs - */ - const size_t NumInputs(); - - /** - * @brief Get number of outputs - * @return number of outputs - */ - const size_t NumOutputs(); - - /** - * @brief Get input size by index - * @param [in] index: input index - * @return size of the input - */ - const size_t GetInputSize(size_t index) const; - const size_t GetInputNumDims(size_t index) const; - aclDataType GetInputDataType(size_t index) const; - aclFormat GetInputFormat(size_t index) const; - - /** - * @brief Get output size by index - * @param [in] index: output index - * @return size of the output - */ - size_t GetOutputSize(size_t index) const; - const size_t GetOutputNumDims(size_t index) const; - aclDataType GetOutputDataType(size_t index) const; - aclFormat GetOutputFormat(size_t index) const; - - /** - * @brief Get input element count by index - * @param i[in] ndex: input index - * @return element count of the input - */ - size_t GetInputElementCount(size_t index) const; - - /** - * @brief Get output element count by index - * @param [in] index: output index - * @return element count of the output - */ - size_t GetOutputElementCount(size_t index) const; - - /** - * @brief Get input shape by index - * @param [in] index: input index - * @return shape of the output - */ - std::vector GetInputShape(size_t index) const; - - /** - * @brief Get output shape by index - * @param [in] index: output index - * @return shape of the output - */ - std::vector GetOutputShape(size_t index) const; - - /** - * @brief Get input buffer(host memory) by index - * @tparam T: data type - * @param [in] index: input index - * @return host address of the input - */ - template T *GetInputBuffer(size_t index) - { - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return nullptr; - } - return reinterpret_cast(hostInputs_[index]); - } - - /** - * @brief Get output buffer(host memory) by index - * @tparam T: data type - * @param [in] index: output index - * @return host address of the output - */ - template const T *GetOutputBuffer(size_t index) - { - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return nullptr; - } - - return reinterpret_cast(hostOutputs_[index]); - } - - /** - * @brief Print readable input by index - * @param [in] index: input index - * @param [in] elementsPerRow: number of elements per row - */ - void PrintInput(size_t index, size_t elementsPerRow = 16); - - /** - * @brief Print readable output by index - * @param [in] index: output index - * @param [in] elementsPerRow: number of elements per row - */ - void PrintOutput(size_t index, size_t elementsPerRow = 16); - - /** - * @brief Compile static op - * @return compile result - */ - bool CompileStaticOp(); - - /** - * @brief Compile dynamic op - * @return compile result - */ - bool CompileDynamicOp(); - - /** - * @brief Run op - * @return run result - */ - bool RunOp(); - -private: - size_t numInputs_; - size_t numOutputs_; - void *workspace_; - - std::vector inputBuffers_; - std::vector outputBuffers_; - - std::vector devInputs_; - std::vector devOutputs_; - - std::vector hostInputs_; - std::vector hostOutputs_; - - std::vector inputTensor_; - std::vector outputTensor_; - OperatorDesc *opDesc_; -}; - -#endif // OP_RUNNER_H diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/operator_desc.h b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/operator_desc.h deleted file mode 100644 index 6d8ee0905..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/inc/operator_desc.h +++ /dev/null @@ -1,57 +0,0 @@ -/** - * @file operator_desc.h - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef OPERATOR_DESC_H -#define OPERATOR_DESC_H - -#include -#include - -#include "acl/acl.h" - -/** - * Op description - */ -struct OperatorDesc { - /** - * Constructor - */ - explicit OperatorDesc(); - - /** - * Destructor - */ - virtual ~OperatorDesc(); - - /** - * Add an input tensor description - * @param [in] dataType: data type - * @param [in] numDims: number of dims - * @param [in] dims: dims - * @param [in] format: format - * @return OperatorDesc - */ - OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format); - - /** - * Add an output tensor description - * @param [in] dataType: data type - * @param [in] numDims: number of dims - * @param [in] dims: dims - * @param [in] format: format - * @return OperatorDesc - */ - OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format); - - std::string opType; - std::vector inputDesc; - std::vector outputDesc; -}; - -#endif // OPERATOR_DESC_H diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/input/.keep b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/input/.keep deleted file mode 100644 index e69de29bb..000000000 diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/run.sh b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/run.sh deleted file mode 100644 index 215f48cee..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/run.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash -CURRENT_DIR=$( - cd $(dirname ${BASH_SOURCE:-$0}) - pwd -) - -if [ -n "$ASCEND_INSTALL_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH -elif [ -n "$ASCEND_HOME_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH -else - if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then - _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest - else - _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest - fi -fi -source $_ASCEND_INSTALL_PATH/bin/setenv.bash -export DDK_PATH=$_ASCEND_INSTALL_PATH -export NPU_HOST_LIB=$_ASCEND_INSTALL_PATH/$(arch)-$(uname -s | tr '[:upper:]' '[:lower:]')/devlib - -function main { - # 1. 清除遗留生成文件和日志文件 - rm -rf $HOME/ascend/log/* - rm ./input/*.bin - rm ./output/*.bin - - # 2. 生成输入数据和真值数据 - cd $CURRENT_DIR - python3 scripts/gen_data.py - if [ $? -ne 0 ]; then - echo "[ERROR]: Generate input data failed!" - return 1 - fi - echo "[INFO]: Generate input data success!" - - # 3. 编译可执行文件 - cd $CURRENT_DIR - rm -rf build - mkdir -p build - cd build - cmake ../src -DCMAKE_SKIP_RPATH=TRUE - if [ $? -ne 0 ]; then - echo "[ERROR]: Cmake failed!" - return 1 - fi - echo "[INFO]: Cmake success!" - make - if [ $? -ne 0 ]; then - echo "[ERROR]: Make failed!" - return 1 - fi - echo "[INFO]: Make success!" - - # 4. 运行可执行文件 - export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH - cd $CURRENT_DIR/output - echo "[INFO]: Execute op!" - ./execute_add_op - if [ $? -ne 0 ]; then - echo "[ERROR]: Acl executable run failed! please check your project!" - return 1 - fi - echo "[INFO]: Acl executable run success!" - - # 5. 精度比对 - cd $CURRENT_DIR - python3 scripts/verify_result.py output/output_dst.bin output/dst.bin - if [ $? -ne 0 ]; then - echo "[ERROR]: Verify result failed!" - return 1 - fi -} - -main diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/acl.json b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/acl.json deleted file mode 100644 index 9e26dfeeb..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/acl.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/gen_data.py b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/gen_data.py deleted file mode 100644 index 6483833c3..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/gen_data.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import numpy as np - - -def gen_golden_data_simple(): - src = np.arange(128, dtype = np.float16) - dst_Offset = np.arange(254, -1, -2, dtype = np.uint32) - dst = np.arange(127, -1, -1, dtype = np.float16) - src.tofile("./input/src.bin") - dst_Offset.tofile("./input/dst_Offset.bin") - dst.tofile("./output/dst.bin") - - -if __name__ == "__main__": - gen_golden_data_simple() diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/CMakeLists.txt b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/CMakeLists.txt deleted file mode 100644 index 8e9e45375..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/CMakeLists.txt +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. - -# CMake lowest version requirement -cmake_minimum_required(VERSION 3.5.1) - -# project information -project(acl_execute_add) - -# Compile options -add_compile_options(-std=c++11) - -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../output") -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../output") - -set(INC_PATH $ENV{DDK_PATH}) - -if (NOT DEFINED ENV{DDK_PATH}) - set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest") - message(STATUS "set default INC_PATH: ${INC_PATH}") -else () - message(STATUS "env INC_PATH: ${INC_PATH}") -endif() - -set(CUST_PKG_PATH "${INC_PATH}/opp/vendors/customize/op_api") - -set(LIB_PATH $ENV{NPU_HOST_LIB}) - -# Dynamic libraries in the stub directory can only be used for compilation -if (NOT DEFINED ENV{NPU_HOST_LIB}) - string(TOLOWER "${CMAKE_SYSTEM_NAME}" SYSTEM_NAME_LOWER) - set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/${CMAKE_SYSTEM_PROCESSOR}-${SYSTEM_NAME_LOWER}/devlib") - message(STATUS "set default LIB_PATH: ${LIB_PATH}") -else () - message(STATUS "env LIB_PATH: ${LIB_PATH}") -endif() - -# Header path -include_directories( - ../inc - ${INC_PATH}/include - ${CUST_PKG_PATH}/include -) - -# add host lib path -link_directories( - ${LIB_PATH} - ${CUST_PKG_PATH}/lib -) - -add_executable(execute_add_op - operator_desc.cpp - op_runner.cpp - main.cpp - common.cpp -) - -target_link_libraries(execute_add_op - ascendcl - cust_opapi - acl_op_compiler - nnopbase - stdc++ -) - -install(TARGETS execute_add_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/common.cpp b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/common.cpp deleted file mode 100644 index 992759c95..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/common.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/** - * @file common.cpp - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include "common.h" - -#include -#include -#include - -#include - -extern bool g_isDevice; - -bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize) -{ - struct stat sBuf; - int fileStatus = stat(filePath.data(), &sBuf); - if (fileStatus == -1) { - ERROR_LOG("failed to get file %s", filePath.c_str()); - return false; - } - if (S_ISREG(sBuf.st_mode) == 0) { - ERROR_LOG("%s is not a file, please enter a file", filePath.c_str()); - return false; - } - - std::ifstream file; - file.open(filePath, std::ios::binary); - if (!file.is_open()) { - ERROR_LOG("Open file failed. path = %s", filePath.c_str()); - return false; - } - - std::filebuf *buf = file.rdbuf(); - size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in); - if (size == 0) { - ERROR_LOG("file size is 0"); - file.close(); - return false; - } - if (size > bufferSize) { - ERROR_LOG("file size is larger than buffer size"); - file.close(); - return false; - } - buf->pubseekpos(0, std::ios::in); - buf->sgetn(static_cast(buffer), size); - fileSize = size; - file.close(); - return true; -} - -bool WriteFile(const std::string &filePath, const void *buffer, size_t size) -{ - if (buffer == nullptr) { - ERROR_LOG("Write file failed. buffer is nullptr"); - return false; - } - - int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE); - if (fd < 0) { - ERROR_LOG("Open file failed. path = %s", filePath.c_str()); - return false; - } - - size_t writeSize = write(fd, buffer, size); - (void)close(fd); - if (writeSize != size) { - ERROR_LOG("Write file Failed."); - return false; - } - - return true; -} diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/main.cpp b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/main.cpp deleted file mode 100644 index e864046b6..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/main.cpp +++ /dev/null @@ -1,163 +0,0 @@ -/** - * @file main.cpp - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include -#include -#include - -#include -#include - -#include "acl/acl.h" -#include "common.h" -#include "op_runner.h" - -bool g_isDevice = false; -int deviceId = 0; - -OperatorDesc CreateOpDesc() -{ - // define operator - std::vector shape{128}; - aclDataType dataType = ACL_FLOAT16; - aclDataType dataType1 = ACL_UINT32; - aclFormat format = ACL_FORMAT_ND; - OperatorDesc opDesc; - opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format); - opDesc.AddInputTensorDesc(dataType1, shape.size(), shape.data(), format); - opDesc.AddOutputTensorDesc(dataType, shape.size(), shape.data(), format); - return opDesc; -} - -bool SetInputData(OpRunner &runner) -{ - size_t fileSize = 0; - ReadFile("../input/src.bin", fileSize, runner.GetInputBuffer(0), runner.GetInputSize(0)); - ReadFile("../input/dst_Offset.bin", fileSize, runner.GetInputBuffer(1), runner.GetInputSize(1)); - INFO_LOG("Set input success"); - return true; -} - -bool ProcessOutputData(OpRunner &runner) -{ - WriteFile("../output/output_dst.bin", runner.GetOutputBuffer(0), runner.GetOutputSize(0)); - INFO_LOG("Write output success"); - return true; -} - -void DestroyResource() -{ - bool flag = false; - if (aclrtResetDevice(deviceId) != ACL_SUCCESS) { - ERROR_LOG("Reset device %d failed", deviceId); - flag = true; - } - INFO_LOG("Reset Device success"); - if (aclFinalize() != ACL_SUCCESS) { - ERROR_LOG("Finalize acl failed"); - flag = true; - } - if (flag) { - ERROR_LOG("Destroy resource failed"); - } else { - INFO_LOG("Destroy resource success"); - } -} - -bool InitResource() -{ - std::string output = "../output"; - if (access(output.c_str(), 0) == -1) { - int ret = mkdir(output.c_str(), 0700); - if (ret == 0) { - INFO_LOG("Make output directory successfully"); - } else { - ERROR_LOG("Make output directory fail"); - return false; - } - } - - // acl.json is dump or profiling config file - if (aclInit("../scripts/acl.json") != ACL_SUCCESS) { - ERROR_LOG("acl init failed"); - return false; - } - - if (aclrtSetDevice(deviceId) != ACL_SUCCESS) { - ERROR_LOG("Set device failed. deviceId is %d", deviceId); - (void)aclFinalize(); - return false; - } - INFO_LOG("Set device[%d] success", deviceId); - - // runMode is ACL_HOST which represents app is running in host - // runMode is ACL_DEVICE which represents app is running in device - aclrtRunMode runMode; - if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) { - ERROR_LOG("Get run mode failed"); - DestroyResource(); - return false; - } - g_isDevice = (runMode == ACL_DEVICE); - INFO_LOG("Get RunMode[%d] success", runMode); - - return true; -} - -bool RunOp() -{ - // create op desc - OperatorDesc opDesc = CreateOpDesc(); - - // create Runner - OpRunner opRunner(&opDesc); - if (!opRunner.Init()) { - ERROR_LOG("Init OpRunner failed"); - return false; - } - - // Load inputs - if (!SetInputData(opRunner)) { - ERROR_LOG("Set input data failed"); - return false; - } - - // Run op - if (!opRunner.RunOp()) { - ERROR_LOG("Run op failed"); - return false; - } - - // process output data - if (!ProcessOutputData(opRunner)) { - ERROR_LOG("Process output data failed"); - return false; - } - - INFO_LOG("Run op success"); - return true; -} - -int main(int argc, char **argv) -{ - if (!InitResource()) { - ERROR_LOG("Init resource failed"); - return FAILED; - } - INFO_LOG("Init resource success"); - - if (!RunOp()) { - DestroyResource(); - return FAILED; - } - - DestroyResource(); - - return SUCCESS; -} diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/op_runner.cpp b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/op_runner.cpp deleted file mode 100644 index 5166558dd..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/op_runner.cpp +++ /dev/null @@ -1,457 +0,0 @@ -/** - * @file op_runner.cpp - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include "op_runner.h" - -#include -#include - -#include "acl/acl_op_compiler.h" -#include "aclnn_scatter_custom.h" -#include "common.h" - -using namespace std; - -extern bool g_isDevice; - -OpRunner::OpRunner(OperatorDesc *opDesc) : opDesc_(opDesc) -{ - numInputs_ = opDesc->inputDesc.size(); - numOutputs_ = opDesc->outputDesc.size(); - workspace_ = nullptr; -} - -OpRunner::~OpRunner() -{ - if (workspace_ != nullptr) { - (void)aclrtFree(workspace_); - } - for (size_t i = 0; i < numInputs_; ++i) { - (void)aclDestroyTensor(inputTensor_[i]); - (void)aclDestroyDataBuffer(inputBuffers_[i]); - (void)aclrtFree(devInputs_[i]); - if (g_isDevice) { - (void)aclrtFree(hostInputs_[i]); - } else { - (void)aclrtFreeHost(hostInputs_[i]); - } - } - - for (size_t i = 0; i < numOutputs_; ++i) { - (void)aclDestroyTensor(outputTensor_[i]); - (void)aclDestroyDataBuffer(outputBuffers_[i]); - (void)aclrtFree(devOutputs_[i]); - if (g_isDevice) { - (void)aclrtFree(hostOutputs_[i]); - } else { - (void)aclrtFreeHost(hostOutputs_[i]); - } - } -} - -bool OpRunner::Init() -{ - for (size_t i = 0; i < numInputs_; ++i) { - auto size = GetInputSize(i); - void *devMem = nullptr; - if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for input[%zu] failed", i); - return false; - } - devInputs_.emplace_back(devMem); - inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size)); - - void *hostInput = nullptr; - if (g_isDevice) { - if (aclrtMalloc(&hostInput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for input[%zu] failed", i); - return false; - } - } else { - if (aclrtMallocHost(&hostInput, size) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for input[%zu] failed", i); - return false; - } - } - if (hostInput == nullptr) { - ERROR_LOG("Malloc memory for input[%zu] failed", i); - return false; - } - hostInputs_.emplace_back(hostInput); - - aclTensor *inputTensor = - aclCreateTensor(GetInputShape(i).data(), GetInputNumDims(i), GetInputDataType(i), nullptr, 0, - GetInputFormat(i), GetInputShape(i).data(), GetInputNumDims(i), devInputs_[i]); - if (inputTensor == nullptr) { - ERROR_LOG("Create Tensor for input[%zu] failed", i); - return false; - } - inputTensor_.emplace_back(inputTensor); - } - - for (size_t i = 0; i < numOutputs_; ++i) { - auto size = GetOutputSize(i); - void *devMem = nullptr; - if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for output[%zu] failed", i); - return false; - } - devOutputs_.emplace_back(devMem); - outputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size)); - - void *hostOutput = nullptr; - if (g_isDevice) { - if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for output[%zu] failed", i); - return false; - } - } else { - if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for output[%zu] failed", i); - return false; - } - } - if (hostOutput == nullptr) { - ERROR_LOG("Malloc host memory for output[%zu] failed", i); - return false; - } - hostOutputs_.emplace_back(hostOutput); - - aclTensor *outputTensor = - aclCreateTensor(GetOutputShape(i).data(), GetOutputNumDims(i), GetOutputDataType(i), nullptr, 0, - GetOutputFormat(i), GetOutputShape(i).data(), GetOutputNumDims(i), devOutputs_[i]); - if (outputTensor == nullptr) { - ERROR_LOG("Create Tensor for output[%zu] failed", i); - return false; - } - outputTensor_.emplace_back(outputTensor); - } - - return true; -} - -const size_t OpRunner::NumInputs() -{ - return numInputs_; -} - -const size_t OpRunner::NumOutputs() -{ - return numOutputs_; -} - -const size_t OpRunner::GetInputSize(size_t index) const -{ - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return 0; - } - - return aclGetTensorDescSize(opDesc_->inputDesc[index]); -} - -const size_t OpRunner::GetInputNumDims(size_t index) const -{ - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return 0; - } - - return aclGetTensorDescNumDims(opDesc_->inputDesc[index]); -} - -aclDataType OpRunner::GetInputDataType(size_t index) const -{ - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return ACL_DT_UNDEFINED; - } - - return aclGetTensorDescType(opDesc_->inputDesc[index]); -} - -aclFormat OpRunner::GetInputFormat(size_t index) const -{ - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return ACL_FORMAT_UNDEFINED; - } - - return aclGetTensorDescFormat(opDesc_->inputDesc[index]); -} - -std::vector OpRunner::GetInputShape(size_t index) const -{ - std::vector ret; - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return ret; - } - - auto desc = opDesc_->inputDesc[index]; - for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) { - int64_t dimSize; - if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) { - ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i); - ret.clear(); - return ret; - } - ret.emplace_back(dimSize); - } - - return ret; -} - -size_t OpRunner::GetOutputSize(size_t index) const -{ - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return 0; - } - - return aclGetTensorDescSize(opDesc_->outputDesc[index]); -} - -const size_t OpRunner::GetOutputNumDims(size_t index) const -{ - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return 0; - } - - return aclGetTensorDescNumDims(opDesc_->outputDesc[index]); -} - -aclDataType OpRunner::GetOutputDataType(size_t index) const -{ - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return ACL_DT_UNDEFINED; - } - - return aclGetTensorDescType(opDesc_->outputDesc[index]); -} - -aclFormat OpRunner::GetOutputFormat(size_t index) const -{ - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return ACL_FORMAT_UNDEFINED; - } - - return aclGetTensorDescFormat(opDesc_->outputDesc[index]); -} - -std::vector OpRunner::GetOutputShape(size_t index) const -{ - std::vector ret; - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return ret; - } - - auto desc = opDesc_->outputDesc[index]; - for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) { - int64_t dimSize; - if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) { - ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i); - ret.clear(); - return ret; - } - ret.emplace_back(dimSize); - } - return ret; -} - -size_t OpRunner::GetInputElementCount(size_t index) const -{ - if (index >= opDesc_->inputDesc.size()) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return 0; - } - - return aclGetTensorDescElementCount(opDesc_->inputDesc[index]); -} - -size_t OpRunner::GetOutputElementCount(size_t index) const -{ - if (index >= opDesc_->outputDesc.size()) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return 0; - } - - return aclGetTensorDescElementCount(opDesc_->outputDesc[index]); -} - -bool OpRunner::RunOp() -{ - for (size_t i = 0; i < numInputs_; ++i) { - auto size = GetInputSize(i); - aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE; - if (g_isDevice) { - kind = ACL_MEMCPY_DEVICE_TO_DEVICE; - } - if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) { - ERROR_LOG("Copy input[%zu] failed", i); - return false; - } - INFO_LOG("Copy input[%zu] success", i); - } - - aclrtStream stream = nullptr; - if (aclrtCreateStream(&stream) != ACL_SUCCESS) { - ERROR_LOG("Create stream failed"); - return false; - } - INFO_LOG("Create stream success"); - - size_t workspaceSize = 0; - aclOpExecutor *handle = nullptr; - auto ret = - aclnnScatterCustomGetWorkspaceSize(inputTensor_[0], inputTensor_[1], outputTensor_[0], &workspaceSize, &handle); - if (ret != ACL_SUCCESS) { - (void)aclrtDestroyStream(stream); - ERROR_LOG("Get Operator Workspace failed. error code is %d", static_cast(ret)); - return false; - } - INFO_LOG("Execute aclnnScatterCustomGetWorkspaceSize success, workspace size %lu", workspaceSize); - - if (workspaceSize != 0) { - if (aclrtMalloc(&workspace_, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory failed"); - } - } - - ret = aclnnScatterCustom(workspace_, workspaceSize, handle, stream); - if (ret != ACL_SUCCESS) { - (void)aclrtDestroyStream(stream); - ERROR_LOG("Execute Operator failed. error code is %d", static_cast(ret)); - return false; - } - INFO_LOG("Execute aclnnScatterCustom success"); - - // The unit of 5000 is ms. - ret = aclrtSynchronizeStreamWithTimeout(stream, 5000); - if (ret != SUCCESS) { - ERROR_LOG("Synchronize stream failed. error code is %d", static_cast(ret)); - (void)aclrtDestroyStream(stream); - return false; - } - INFO_LOG("Synchronize stream success"); - - for (size_t i = 0; i < numOutputs_; ++i) { - auto size = GetOutputSize(i); - aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST; - if (g_isDevice) { - kind = ACL_MEMCPY_DEVICE_TO_DEVICE; - } - if (aclrtMemcpy(hostOutputs_[i], size, devOutputs_[i], size, kind) != ACL_SUCCESS) { - INFO_LOG("Copy output[%zu] success", i); - (void)aclrtDestroyStream(stream); - return false; - } - INFO_LOG("Copy output[%zu] success", i); - } - - (void)aclrtDestroyStream(stream); - return true; -} - -template void DoPrintData(const T *data, size_t count, size_t elementsPerRow) -{ - assert(elementsPerRow != 0); - for (size_t i = 0; i < count; ++i) { - std::cout << std::setw(10) << data[i]; - if (i % elementsPerRow == elementsPerRow - 1) { - std::cout << std::endl; - } - } -} - -void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow) -{ - assert(elementsPerRow != 0); - for (size_t i = 0; i < count; ++i) { - std::cout << std::setw(10) << std::setprecision(4) << aclFloat16ToFloat(data[i]); - if (i % elementsPerRow == elementsPerRow - 1) { - std::cout << std::endl; - } - } -} - -void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow) -{ - if (data == nullptr) { - ERROR_LOG("Print data failed. data is nullptr"); - return; - } - - switch (dataType) { - case ACL_BOOL: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_INT8: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_UINT8: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_INT16: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_UINT16: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_INT32: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_UINT32: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_INT64: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_UINT64: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_FLOAT16: - DoPrintFp16Data(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_FLOAT: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_DOUBLE: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - default: - ERROR_LOG("Unsupported type: %d", dataType); - } -} - -void OpRunner::PrintInput(size_t index, size_t numElementsPerRow) -{ - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_); - return; - } - - auto desc = opDesc_->inputDesc[index]; - PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow); -} - -void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow) -{ - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return; - } - - auto desc = opDesc_->outputDesc[index]; - PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow); -} diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/operator_desc.cpp b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/operator_desc.cpp deleted file mode 100644 index da04cf6c9..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/src/operator_desc.cpp +++ /dev/null @@ -1,51 +0,0 @@ -/** - * @file operator_desc.cpp - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include "operator_desc.h" - -#include "common.h" - -using namespace std; - -OperatorDesc::OperatorDesc() {} - -OperatorDesc::~OperatorDesc() -{ - for (auto *desc : inputDesc) { - aclDestroyTensorDesc(desc); - } - - for (auto *desc : outputDesc) { - aclDestroyTensorDesc(desc); - } -} - -OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format) -{ - aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format); - if (desc == nullptr) { - ERROR_LOG("create tensor failed"); - return *this; - } - inputDesc.emplace_back(desc); - return *this; -} - -OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, - aclFormat format) -{ - aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format); - if (desc == nullptr) { - ERROR_LOG("create tensor failed"); - return *this; - } - - outputDesc.emplace_back(desc); - return *this; -} diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/README.md b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/README.md deleted file mode 100644 index 5438be0a9..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/README.md +++ /dev/null @@ -1,109 +0,0 @@ -## 概述 -本样例基于ScatterCustom算子工程,介绍了msOpGen工具生成简易自定义算子工程和单算子调用。 - -## 目录结构介绍 -``` -├── 28_Scatter_frameworklaunch // 使用框架调用的方式调用Scatter算子 -│ ├── AclNNInvocationNaive // 通过aclnn调用的方式调用ScatterCustom算子, 简化了编译脚本 -│ ├── ScatterCustom // ScatterCustom算子工程 -│ ├── ScatterCustom.json // ScatterCustom算子的原型定义json文件 -│ └── install.sh // 脚本,调用msOpGen生成简易自定义算子工程,并编译 -``` - -## 算子描述 -Scatter算子实现了根据偏移地址生成新的结果张量后将输入张量分散到结果张量 - -## 算子规格描述 - - - - - - - - - - - -
算子类型(OpType)Scatter
算子输入nameshapedata typeformat
x8 * 2048floatND
y8 * 2048floatND
算子输出z8 * 2048floatND
核函数名Scatter_custom
- -## 支持的产品型号 -本样例支持如下产品型号: -- Atlas 训练系列产品 -- Atlas 推理系列产品AI Core -- Atlas A2训练系列产品/Atlas A2推理系列产品 -- Atlas 200/500 A2推理产品 - -## 算子工程介绍 -其中,算子工程目录ScatterCustom包含算子的实现文件,如下所示: -``` -├── ScatterCustom // Scatter自定义算子工程 -│ ├── op_host // host侧实现文件 -│ └── op_kernel // kernel侧实现文件 -``` -CANN软件包中提供了工程创建工具msOpGen,ScatterCustom算子工程可通过ScatterCustom.json自动创建,简易自定义算子工程具体请参考[Ascend C算子开发](https://hiascend.com/document/redirect/CannCommunityOpdevAscendC)>附录>简易自定义算子工程 章节。 - -创建完简易自定义算子工程后,开发者重点需要完成算子工程目录CustomOp下host和kernel的功能开发。为简化样例运行流程,本样例已在ScatterCustom目录准备好了必要的算子实现,install.sh脚本会自动将实现复制到CustomOp对应目录下,再编译算子。 - -## 编译运行样例算子 -针对简易自定义算子工程,编译运行包含如下步骤: -- 调用msOpGen工具生成简易自定义算子工程; -- 完成算子host和kernel实现; -- 编译简易自定义算子工程; -- 调用执行自定义算子; - -详细操作如下所示。 -### 1. 获取源码包 -编译运行此样例前,请参考[准备:获取样例代码](../README.md#codeready)获取源码包。 - -### 2. 生成简易自定义算子工程,复制host和kernel实现并编译算子 - - 切换到msOpGen脚本install.sh所在目录 - ```bash - # 若开发者以git命令行方式clone了master分支代码,并切换目录 - cd ${git_clone_path}/samples/operator/ascendc/0_introduction/28_Scatter_frameworklaunch - ``` - - - 调用脚本,生成简易自定义算子工程,复制host和kernel实现并编译算子 - - 方式一:配置环境变量运行脚本 - 请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware),选择对应配置环境变量命令。 - - 默认路径,root用户安装CANN软件包 - ```bash - export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest - ``` - - 默认路径,非root用户安装CANN软件包 - ```bash - export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest - ``` - - 指定路径install_path,安装CANN软件包 - ```bash - export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest - ``` - 运行install.sh脚本 - ```bash - bash install.sh -v [SOC_VERSION] - ``` - - 方式二:指定命令行安装路径来运行脚本 - ```bash - bash install.sh -v [SOC_VERSION] -i [ASCEND_INSTALL_PATH] - ``` - 参数说明: - - SOC_VERSION:昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号: - - Atlas 训练系列产品 - - Atlas 推理系列产品AI Core - - Atlas A2训练系列产品/Atlas A2推理系列产品 - - Atlas 200/500 A2推理产品 - - ASCEND_INSTALL_PATH:CANN软件包安装路径 - - 脚本运行成功后,会在当前目录下创建CustomOp目录,编译完成后,会在CustomOp/build_out/op_api/lib目录下生成自定义算子库文件libcust_opapi.so,在CustomOp/build_out/op_api/include目录下生成aclnn接口的头文件。 - - 备注:如果要使用dump调试功能,需要移除op_host内的Atlas 训练系列产品、Atlas 200/500 A2 推理产品的配置项。 - -### 3. 调用执行算子工程 -- [aclnn调用ScatterCustom算子工程(代码简化)](./AclNNInvocationNaive/README.md) - -## 更新说明 -| 时间 | 更新事项 | -| ---------- | ---------------------------- | -| 2024/10/21 | 初始版本 | -| 2024/11/11 | 样例目录调整 | -| 2024/11/18 | README.md更新 | diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom.json b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom.json deleted file mode 100644 index 530e9779d..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom.json +++ /dev/null @@ -1,39 +0,0 @@ -[ - { - "op": "ScatterCustom", - "input_desc": [ - { - "name": "x", - "param_type": "required", - "format": [ - "ND" - ], - "type": [ - "float16" - ] - }, - { - "name": "y", - "param_type": "required", - "format": [ - "ND" - ], - "type": [ - "float16" - ] - } - ], - "output_desc": [ - { - "name": "z", - "param_type": "required", - "format": [ - "ND" - ], - "type": [ - "float16" - ] - } - ] - } -] \ No newline at end of file diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom.cpp b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom.cpp deleted file mode 100644 index d7eb2f964..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/** - * @file scatter_custom.cpp - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include "scatter_custom_tiling.h" -#include "register/op_def_registry.h" - -namespace optiling { -const uint32_t BLOCK_DIM = 1; -static ge::graphStatus TilingFunc(gert::TilingContext *context) -{ - context->SetBlockDim(BLOCK_DIM); - size_t *currentWorkspace = context->GetWorkspaceSizes(1); - currentWorkspace[0] = 0; - return ge::GRAPH_SUCCESS; -} -} // namespace optiling - -namespace ge { -static graphStatus InferShape(gert::InferShapeContext *context) -{ - const gert::Shape *x1_shape = context->GetInputShape(0); - gert::Shape *y_shape = context->GetOutputShape(0); - *y_shape = *x1_shape; - return GRAPH_SUCCESS; -} - -static graphStatus InferDataType(gert::InferDataTypeContext *context) -{ - const auto inputDataType = context->GetInputDataType(0); - context->SetOutputDataType(0, inputDataType); - return ge::GRAPH_SUCCESS; -} -} // namespace ge - -namespace ops { -class ScatterCustom : public OpDef { -public: - explicit ScatterCustom(const char *name) : OpDef(name) - { - this->Input("x") - .ParamType(REQUIRED) - .DataType({ge::DT_FLOAT16}) - .Format({ge::FORMAT_ND}); - this->Input("y") - .ParamType(REQUIRED) - .DataType({ge::DT_UINT32}) - .Format({ge::FORMAT_ND}); - this->Output("z") - .ParamType(REQUIRED) - .DataType({ge::DT_FLOAT16}) - .Format({ge::FORMAT_ND}); - - this->SetInferShape(ge::InferShape).SetInferDataType(ge::InferDataType); - this->AICore() - .SetTiling(optiling::TilingFunc) - .AddConfig("ascend910") - .AddConfig("ascend310p") - .AddConfig("ascend310b") - .AddConfig("ascend910b"); - } -}; -OP_ADD(ScatterCustom); -} // namespace ops diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom_tiling.h b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom_tiling.h deleted file mode 100644 index 38a50322c..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_host/scatter_custom_tiling.h +++ /dev/null @@ -1,22 +0,0 @@ -/** - * @file Scatter_custom_tiling.h - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef SCATTER_CUSTOM_TILING_H -#define SCATTER_CUSTOM_TILING_H -#include "register/tilingdata_base.h" - -namespace optiling { -BEGIN_TILING_DATA_DEF(TilingData) -TILING_DATA_FIELD_DEF(uint32_t, totalLength); -TILING_DATA_FIELD_DEF(uint32_t, tileNum); -END_TILING_DATA_DEF; - -REGISTER_TILING_DATA_CLASS(ScatterCustom, TilingData) -} // namespace optiling -#endif // ADD_CUSTOM_TILING_H diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/install.sh b/operator/ascendc/0_introduction/28_scatter_frameworklaunch/install.sh deleted file mode 100644 index bf2ba6ec4..000000000 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/install.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash -SHORT=v:,i:, -LONG=soc-version:,install-path:, -OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") -eval set -- "$OPTS" - -while :; do - case "$1" in - -v | --soc-version) - SOC_VERSION="$2" - shift 2 - ;; - -i | --install-path) - ASCEND_INSTALL_PATH="$2" - shift 2 - ;; - --) - shift - break - ;; - *) - echo "[ERROR]: Unexpected option: $1" - break - ;; - esac -done - -VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4" -if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then - echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]" - exit -1 -fi - -if [ -n "$ASCEND_INSTALL_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH -elif [ -n "$ASCEND_HOME_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH -else - if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then - _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest - else - _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest - fi -fi -source $_ASCEND_INSTALL_PATH/bin/setenv.bash -export ASCEND_HOME_PATH=$_ASCEND_INSTALL_PATH - -rm -rf CustomOp -# Generate the op framework -msopgen gen -i ScatterCustom.json -f aclnn -c ai_core-${SOC_VERSION} -lan cpp -out CustomOp -# Copy ScatterCustom op implementation files to CustomOp -cp -rf ScatterCustom/* CustomOp -# build CustomOp, compile ScatterCustom op -(cd CustomOp && bash build.sh) \ No newline at end of file diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/CMakeLists.txt b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/CMakeLists.txt new file mode 100644 index 000000000..c8026efb8 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/CMakeLists.txt @@ -0,0 +1,44 @@ +cmake_minimum_required(VERSION 3.16) +project(Ascend_c) + +set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu") +set(SOC_VERSION "Ascend310P3" CACHE STRING "system on chip type") +set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest" + CACHE STRING "ASCEND CANN package installation directory" +) +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE) +endif() +if(CMAKE_INSTALL_PREFIX STREQUAL /usr/local) + set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE) +endif() + +file(GLOB KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/scatter_custom.cpp) + +if("${RUN_MODE}" STREQUAL "cpu") + include(cmake/cpu_lib.cmake) +elseif("${RUN_MODE}" STREQUAL "sim" OR "${RUN_MODE}" STREQUAL "npu") + include(cmake/npu_lib.cmake) +else() + message("invalid RUN_MODE: ${RUN_MODE}") +endif() +add_executable(ascendc_kernels_bbit ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp) + +target_compile_options(ascendc_kernels_bbit PRIVATE + $:-g>> + -O2 -std=c++17 -D_GLIBCXX_USE_CXX11_ABI=0 -Wall -Werror +) + +target_link_libraries(ascendc_kernels_bbit PRIVATE + $:host_intf_pub>> + $:tikicpulib::${SOC_VERSION}>> + ascendcl + $:c_sec>> + $:ascendc_kernels>> +) + +install(TARGETS ascendc_kernels_bbit + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/README.md b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/README.md new file mode 100644 index 000000000..02e7bc9bf --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/README.md @@ -0,0 +1,72 @@ +## 概述 +本样例介绍兼容Scatter算子实现及核函数直调方法。 + +## 目录结构介绍 +``` +├── ScatterKernelInvocationAcl +│ ├── cmake // 编译工程文件 +│ ├── scripts +│ │ ├── gen_data.py // 输入数据和真值数据生成脚本 +│ │ └── verify_result.py // 验证输出数据和真值数据是否一致的验证脚本 +│ ├── scatter_custom.cpp // 算子kernel实现 +│ ├── CMakeLists.txt // 编译工程文件 +│ ├── data_utils.h // 数据读入写出函数 +│ ├── main.cpp // 主函数,调用算子的应用程序,含CPU域及NPU域调用 +│ └── run.sh // 编译运行算子的脚本 +``` +## 代码实现介绍 +本调用样例中实现的是1971对于Scatter功能变换的兼容样例。 + + Scatter计算逻辑是:给定一个连续的输入张量和一个目的地址偏移张量,Scatter指令根据偏移地址生成新的结果张量后将输入张量分散到结果张量中。 + 兼容Scatter算子逻辑是:对于部分有规律的离散计算,可以通过Loop循环搬出的方式来提升效率,对于完全离散的场景。只能通过标量搬出的方式进行处理。 + + Scatter兼容样例的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor srcGm和dstGm搬运到Local Memory,分别存储在xLocal、yLocal,Compute任务负责对xLocal、yLocal进行标量计算,计算结果存储在zLocal中,CopyOut任务负责将输出数据从zLocal搬运至Global Memory上的输出Tensor dstGm中。具体请参考[scatter_custom.cpp](./scatter_custom.cpp)。 + +- 调用实现 + 1. CPU侧运行验证主要通过ICPU_RUN_KF CPU调测宏等CPU调测库提供的接口来完成; + 2. NPU侧运行验证主要通过使用aclrtLaunchKernelWithConfig函数调用来完成。 + + 应用程序通过ASCENDC_CPU_DEBUG 宏区分代码逻辑运行于CPU侧还是NPU侧。 + +## 运行样例算子 + - 打开样例目录 + 以命令行方式下载样例代码,master分支为例。 + ```bash + cd ${git_clone_path}/samples/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl + ``` + - 配置环境变量 + + 请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware),选择对应配置环境变量的命令。 + - 默认路径,root用户安装CANN软件包 + ```bash + export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest + ``` + - 默认路径,非root用户安装CANN软件包 + ```bash + export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest + ``` + - 指定路径install_path,安装CANN软件包 + ```bash + export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest + ``` + + - 样例执行 + + ```bash + bash run.sh -r [RUN_MODE] -v [SOC_VERSION] + ``` + - RUN_MODE:编译方式,可选择CPU调试,NPU上板。支持参数为[cpu / npu] + - SOC_VERSION:昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号: + - Atlas 训练系列产品 + - Atlas 推理系列产品AI Core + - Atlas A2训练系列产品/Atlas 800I A2推理产品 + - Atlas 200/500 A2推理产品 + + 示例如下,Ascendxxxyy请替换为实际的AI处理器型号。 + ```bash + bash run.sh -r cpu -v Ascendxxxyy + ``` +## 更新说明 +| 时间 | 更新事项 | +| ---------- | ------------ | +| 2025/06/05 | 新增本readme | \ No newline at end of file diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/cmake/cpu_lib.cmake b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/cmake/cpu_lib.cmake new file mode 100644 index 000000000..751a11941 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/cmake/cpu_lib.cmake @@ -0,0 +1,9 @@ +if(NOT DEFINED ENV{CMAKE_PREFIX_PATH}) + set(CMAKE_PREFIX_PATH ${ASCEND_CANN_PACKAGE_PATH}/tools/tikicpulib/lib/cmake) +endif() +find_package(tikicpulib REQUIRED) + +add_library(ascendc_kernels SHARED ${KERNEL_FILES}) +target_link_libraries(ascendc_kernels PUBLIC tikicpulib::${SOC_VERSION}) +target_compile_options(ascendc_kernels PRIVATE -g -O0 -std=c++17) +install(TARGETS ascendc_kernels DESTINATION ${CMAKE_INSTALL_LIBDIR}) diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/cmake/npu_lib.cmake b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/cmake/npu_lib.cmake new file mode 100644 index 000000000..d862f0064 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/cmake/npu_lib.cmake @@ -0,0 +1,10 @@ +if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) +elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) +else() + message(FATAL_ERROR "ascendc_kernel_cmake does not exist ,please check whether the cann package is installed") +endif() +include(${ASCENDC_CMAKE_DIR}/ascendc.cmake) + +ascendc_fatbin_library(ascendc_kernels ${KERNEL_FILES}) diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/data_utils.h b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/data_utils.h new file mode 100644 index 000000000..82c2c743c --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/data_utils.h @@ -0,0 +1,242 @@ +/** + * @file data_utils.h + * +* Copyright (c) 2025 Huawei Technologies Co., Ltd. +* This program is free software, you can redistribute it and/or modify it under the terms and conditions of +* CANN Open Software License Agreement Version 2.0 (the "License"). +* Please refer to the License for details. You may not use this file except in compliance with the License. +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +* See LICENSE in the root of the software repository for the full text of the License. +*/ +#ifndef DATA_UTILS_H +#define DATA_UTILS_H +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "acl/acl.h" + +typedef enum { + DT_UNDEFINED = -1, + FLOAT = 0, + HALF = 1, + INT8_T = 2, + INT32_T = 3, + UINT8_T = 4, + INT16_T = 6, + UINT16_T = 7, + UINT32_T = 8, + INT64_T = 9, + UINT64_T = 10, + DOUBLE = 11, + BOOL = 12, + STRING = 13, + COMPLEX64 = 16, + COMPLEX128 = 17, + BF16 = 27 +} printDataType; + +#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args) +#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN] " fmt "\n", ##args) +#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR] " fmt "\n", ##args) +#define CHECK_ACL(x) \ + do { \ + aclError __ret = x; \ + if (__ret != ACL_ERROR_NONE) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \ + } \ + } while (0); + +/** + * @brief Read data from file + * @param [in] filePath: file path + * @param [out] fileSize: file size + * @return read result + */ +bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize) +{ + struct stat sBuf; + int fileStatus = stat(filePath.data(), &sBuf); + if (fileStatus == -1) { + ERROR_LOG("failed to get file"); + return false; + } + if (S_ISREG(sBuf.st_mode) == 0) { + ERROR_LOG("%s is not a file, please enter a file", filePath.c_str()); + return false; + } + + std::ifstream file; + file.open(filePath, std::ios::binary); + if (!file.is_open()) { + ERROR_LOG("Open file failed. path = %s", filePath.c_str()); + return false; + } + + std::filebuf *buf = file.rdbuf(); + size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in); + if (size == 0) { + ERROR_LOG("file size is 0"); + file.close(); + return false; + } + if (size > bufferSize) { + ERROR_LOG("file size is larger than buffer size"); + file.close(); + return false; + } + buf->pubseekpos(0, std::ios::in); + buf->sgetn(static_cast(buffer), size); + fileSize = size; + file.close(); + return true; +} + +/** + * @brief Write data to file + * @param [in] filePath: file path + * @param [in] buffer: data to write to file + * @param [in] size: size to write + * @return write result + */ +bool WriteFile(const std::string &filePath, const void *buffer, size_t size) +{ + if (buffer == nullptr) { + ERROR_LOG("Write file failed. buffer is nullptr"); + return false; + } + + int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE); + if (fd < 0) { + ERROR_LOG("Open file failed. path = %s", filePath.c_str()); + return false; + } + + size_t writeSize = write(fd, buffer, size); + (void)close(fd); + if (writeSize != size) { + ERROR_LOG("Write file Failed."); + return false; + } + + return true; +} + +/** + * @brief Reads a binary file into memory. + * + * This function opens a binary file, reads its contents into a dynamically allocated memory buffer, + * and returns a pointer to the buffer and the size of the file through output parameters. + * + * @param filePath The path to the binary file to be read. + * @param outBuffer A reference to a unique pointer that will hold the file data. + * @param outSize A reference to a size_t that will hold the size of the file. + * @return true if the file was read successfully, false otherwise. + */ +bool ReadBinaryFile(const char *filePath, std::unique_ptr &outBuffer, size_t &outSize) +{ + FILE *file = fopen(filePath, "rb"); + if (!file) { + ERROR_LOG("Error opening file: %s\n", strerror(errno)); + return false; + } + + fseek(file, 0, SEEK_END); + outSize = ftell(file); + rewind(file); + + outBuffer.reset(new char[outSize]); + if (fread(outBuffer.get(), 1, outSize, file) != outSize) { + ERROR_LOG("Error reading file.\n"); + fclose(file); + return false; + } + + fclose(file); + return true; +} + +template void DoPrintData(const T *data, size_t count, size_t elementsPerRow) +{ + assert(elementsPerRow != 0); + for (size_t i = 0; i < count; ++i) { + std::cout << std::setw(10) << data[i]; + if (i % elementsPerRow == elementsPerRow - 1) { + std::cout << std::endl; + } + } +} + +void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow) +{ + assert(elementsPerRow != 0); + for (size_t i = 0; i < count; ++i) { + std::cout << std::setw(10) << std::setprecision(6) << aclFloat16ToFloat(data[i]); + if (i % elementsPerRow == elementsPerRow - 1) { + std::cout << std::endl; + } + } +} + +void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow = 16) +{ + if (data == nullptr) { + ERROR_LOG("Print data failed. data is nullptr"); + return; + } + + switch (dataType) { + case BOOL: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT8_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT8_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT16_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT16_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT32_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT32_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT64_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT64_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case HALF: + DoPrintHalfData(reinterpret_cast(data), count, elementsPerRow); + break; + case FLOAT: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case DOUBLE: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + default: + ERROR_LOG("Unsupported type: %d", dataType); + } + std::cout << std::endl; +} +#endif // DATA_UTILS_H diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/main.cpp b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/main.cpp new file mode 100644 index 000000000..c9aaaba9d --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/main.cpp @@ -0,0 +1,100 @@ +/** + * @file main.cpp + * +* Copyright (c) 2025 Huawei Technologies Co., Ltd. +* This program is free software, you can redistribute it and/or modify it under the terms and conditions of +* CANN Open Software License Agreement Version 2.0 (the "License"). +* Please refer to the License for details. You may not use this file except in compliance with the License. +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +* See LICENSE in the root of the software repository for the full text of the License. +*/ +#include "data_utils.h" +#ifndef ASCENDC_CPU_DEBUG +#include "acl/acl.h" +#else +#include "tikicpulib.h" +extern "C" __global__ __aicore__ void scatter_custom(GM_ADDR srcGm, GM_ADDR dstOffsetGm, GM_ADDR dstGm); +#endif + +int32_t main(int32_t argc, char *argv[]) +{ + uint32_t blockDim = 1; + size_t inputByteSize = 128 * sizeof(uint16_t); + size_t inputByteSize1 = 128 * sizeof(uint32_t); + size_t outputByteSize = 128 * sizeof(uint16_t); + +#ifdef ASCENDC_CPU_DEBUG + uint8_t *x = (uint8_t *)AscendC::GmAlloc(inputByteSize); + uint8_t *y = (uint8_t *)AscendC::GmAlloc(inputByteSize1); + uint8_t *z = (uint8_t *)AscendC::GmAlloc(outputByteSize); + + ReadFile("./input/input_x.bin", inputByteSize, x, inputByteSize); + ReadFile("./input/input_y.bin", inputByteSize1, y, inputByteSize1); + + AscendC::SetKernelMode(KernelMode::AIV_MODE); + ICPU_RUN_KF(scatter_custom, blockDim, x, y, z); // use this macro for cpu debug + + WriteFile("./output/output_z.bin", z, outputByteSize); + + AscendC::GmFree((void *)x); + AscendC::GmFree((void *)y); + AscendC::GmFree((void *)z); +#else + CHECK_ACL(aclInit(nullptr)); + int32_t deviceId = 0; + CHECK_ACL(aclrtSetDevice(deviceId)); + aclrtStream stream = nullptr; + CHECK_ACL(aclrtCreateStream(&stream)); + + uint8_t *xHost, *yHost, *zHost; + uint8_t *xDevice, *yDevice, *zDevice; + + CHECK_ACL(aclrtMallocHost((void **)(&xHost), inputByteSize)); + CHECK_ACL(aclrtMallocHost((void **)(&yHost), inputByteSize1)); + CHECK_ACL(aclrtMallocHost((void **)(&zHost), outputByteSize)); + CHECK_ACL(aclrtMalloc((void **)&xDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&yDevice, inputByteSize1, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&zDevice, outputByteSize, ACL_MEM_MALLOC_HUGE_FIRST)); + + ReadFile("./input/input_x.bin", inputByteSize, xHost, inputByteSize); + ReadFile("./input/input_y.bin", inputByteSize1, yHost, inputByteSize1); + + CHECK_ACL(aclrtMemcpy(xDevice, inputByteSize, xHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE)); + CHECK_ACL(aclrtMemcpy(yDevice, inputByteSize1, yHost, inputByteSize1, ACL_MEMCPY_HOST_TO_DEVICE)); + + aclrtBinHandle binHandle = nullptr; + aclrtFuncHandle funcHandle = nullptr; + aclrtArgsHandle argsHandle = nullptr; + aclrtParamHandle paramHandle = nullptr; + + const char *filePath = "./out/fatbin/ascendc_kernels/ascendc_kernels.o"; + CHECK_ACL(aclrtBinaryLoadFromFile(filePath, nullptr, &binHandle)); + CHECK_ACL(aclrtBinaryGetFunction(binHandle, "scatter_custom", &funcHandle)); + CHECK_ACL(aclrtKernelArgsInit(funcHandle, &argsHandle)); + + CHECK_ACL(aclrtKernelArgsAppend(argsHandle, (void **)&xDevice, sizeof(uintptr_t), ¶mHandle)); + CHECK_ACL(aclrtKernelArgsAppend(argsHandle, (void **)&yDevice, sizeof(uintptr_t), ¶mHandle)); + CHECK_ACL(aclrtKernelArgsAppend(argsHandle, (void **)&zDevice, sizeof(uintptr_t), ¶mHandle)); + CHECK_ACL(aclrtKernelArgsFinalize(argsHandle)); + + CHECK_ACL(aclrtLaunchKernelWithConfig(funcHandle, blockDim, stream, nullptr, argsHandle, nullptr)); + CHECK_ACL(aclrtSynchronizeStream(stream)); + + CHECK_ACL(aclrtMemcpy(zHost, outputByteSize, zDevice, outputByteSize, ACL_MEMCPY_DEVICE_TO_HOST)); + WriteFile("./output/output_z.bin", zHost, outputByteSize); + + CHECK_ACL(aclrtBinaryUnLoad(binHandle)); + CHECK_ACL(aclrtFree(xDevice)); + CHECK_ACL(aclrtFree(yDevice)); + CHECK_ACL(aclrtFree(zDevice)); + CHECK_ACL(aclrtFreeHost(xHost)); + CHECK_ACL(aclrtFreeHost(yHost)); + CHECK_ACL(aclrtFreeHost(zHost)); + + CHECK_ACL(aclrtDestroyStream(stream)); + CHECK_ACL(aclrtResetDevice(deviceId)); + CHECK_ACL(aclFinalize()); +#endif + return 0; +} diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/run.sh b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/run.sh new file mode 100644 index 000000000..eeff1b86f --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/run.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# ---------------------------------------------------------------------------------------------------------- +# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# This program is free software, you can redistribute it and/or modify it under the terms and conditions of +# CANN Open Software License Agreement Version 2.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ---------------------------------------------------------------------------------------------------------- +CURRENT_DIR=$( + cd $(dirname ${BASH_SOURCE:-$0}) + pwd +) +cd $CURRENT_DIR + +BUILD_TYPE="Debug" +INSTALL_PREFIX="${CURRENT_DIR}/out" + +SHORT=r:,v:,i:,b:,p:, +LONG=run-mode:,soc-version:,install-path:,build-type:,install-prefix:, +OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") +eval set -- "$OPTS" + +while :; do + case "$1" in + -r | --run-mode) + RUN_MODE="$2" + shift 2 + ;; + -v | --soc-version) + SOC_VERSION="$2" + shift 2 + ;; + -i | --install-path) + ASCEND_INSTALL_PATH="$2" + shift 2 + ;; + -b | --build-type) + BUILD_TYPE="$2" + shift 2 + ;; + -p | --install-prefix) + INSTALL_PREFIX="$2" + shift 2 + ;; + --) + shift + break + ;; + *) + echo "[ERROR]: Unexpected option: $1" + break + ;; + esac +done + +RUN_MODE_LIST="cpu npu" +if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then + echo "[ERROR]: RUN_MODE error, This sample only support specify cpu or npu!" + exit -1 +fi + +VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4" +if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then + echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]" + exit -1 +fi + +if [ -n "$ASCEND_INSTALL_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH +elif [ -n "$ASCEND_HOME_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH +else + if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then + _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest + else + _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest + fi +fi + +export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH} +export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH} +echo "[INFO]: Current compile soc version is ${SOC_VERSION}" +source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash +if [ "${RUN_MODE}" = "cpu" ]; then + export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH +fi + +set -e +rm -rf build out +mkdir -p build +cmake -B build \ + -DRUN_MODE=${RUN_MODE} \ + -DSOC_VERSION=${SOC_VERSION} \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ + -DASCEND_CANN_PACKAGE_PATH=${_ASCEND_INSTALL_PATH} +cmake --build build -j +cmake --install build + +rm -f ascendc_kernels_bbit +cp ./out/bin/ascendc_kernels_bbit ./ +rm -rf input output +mkdir -p input output +python3 scripts/gen_data.py +( + export LD_LIBRARY_PATH=$(pwd)/out/lib:$(pwd)/out/lib64:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH + if [[ "$RUN_WITH_TOOLCHAIN" -eq 1 ]]; then + if [ "${RUN_MODE}" = "npu" ]; then + msprof op --application=./ascendc_kernels_bbit + elif [ "${RUN_MODE}" = "sim" ]; then + msprof op simulator --application=./ascendc_kernels_bbit + elif [ "${RUN_MODE}" = "cpu" ]; then + ./ascendc_kernels_bbit + fi + else + ./ascendc_kernels_bbit + fi +) +md5sum output/*.bin +python3 scripts/verify_result.py output/output_z.bin output/golden.bin diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_kernel/scatter_custom.cpp b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scatter_custom.cpp similarity index 46% rename from operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_kernel/scatter_custom.cpp rename to operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scatter_custom.cpp index 80052b344..72c85d8af 100644 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/ScatterCustom/op_kernel/scatter_custom.cpp +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scatter_custom.cpp @@ -1,28 +1,30 @@ /** - * @file scatter_custom.cpp + * @file add_custom.cpp * - * Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ +* Copyright (c) 2025 Huawei Technologies Co., Ltd. +* This program is free software, you can redistribute it and/or modify it under the terms and conditions of +* CANN Open Software License Agreement Version 2.0 (the "License"). +* Please refer to the License for details. You may not use this file except in compliance with the License. +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +* See LICENSE in the root of the software repository for the full text of the License. +*/ #include "kernel_operator.h" -constexpr int32_t BUFFER_NUM = 1; // tensor num for each queue -template -class ScatterTest { +constexpr int32_t BUFFER_NUM = 1; // tensor num for each queue + +class KernelScatter { public: - __aicore__ inline ScatterTest() {} + __aicore__ inline KernelScatter() {} __aicore__ inline void Init(GM_ADDR srcGm, GM_ADDR dstOffsetGm, GM_ADDR dstGm, uint32_t count) { mElementCount = count; - xGm.SetGlobalBuffer((__gm__ DTYPE_X *)srcGm, mElementCount); - yGm.SetGlobalBuffer((__gm__ DTYPE_Y *)dstOffsetGm, mElementCount); - zGm.SetGlobalBuffer((__gm__ DTYPE_Z *)dstGm, mElementCount); - pipe.InitBuffer(inQueueX, BUFFER_NUM, mElementCount * sizeof(DTYPE_X)); - pipe.InitBuffer(inQueueY, BUFFER_NUM, mElementCount * sizeof(DTYPE_Y)); - pipe.InitBuffer(outQueueZ, BUFFER_NUM, mElementCount * sizeof(DTYPE_Z)); + xGm.SetGlobalBuffer((__gm__ half *)srcGm, mElementCount); + yGm.SetGlobalBuffer((__gm__ uint32_t *)dstOffsetGm, mElementCount); + zGm.SetGlobalBuffer((__gm__ half *)dstGm, mElementCount); + pipe.InitBuffer(inQueueX, BUFFER_NUM, mElementCount * sizeof(half)); + pipe.InitBuffer(inQueueY, BUFFER_NUM, mElementCount * sizeof(uint32_t)); + pipe.InitBuffer(outQueueZ, BUFFER_NUM, mElementCount * sizeof(half)); } __aicore__ inline void Process() { @@ -34,8 +36,8 @@ public: private: __aicore__ inline void CopyIn() { - AscendC::LocalTensor xLocal = inQueueX.AllocTensor(); - AscendC::LocalTensor yLocal = inQueueY.AllocTensor(); + AscendC::LocalTensor xLocal = inQueueX.AllocTensor(); + AscendC::LocalTensor yLocal = inQueueY.AllocTensor(); AscendC::DataCopy(xLocal, xGm, mElementCount); AscendC::DataCopy(yLocal, yGm, mElementCount); inQueueX.EnQue(xLocal); @@ -43,21 +45,21 @@ private: } __aicore__ inline void Compute() { - AscendC::LocalTensor xLocal = inQueueX.DeQue(); - AscendC::LocalTensor yLocal = inQueueY.DeQue(); - AscendC::LocalTensor zLocal = outQueueZ.AllocTensor(); + AscendC::LocalTensor xLocal = inQueueX.DeQue(); + AscendC::LocalTensor yLocal = inQueueY.DeQue(); + AscendC::LocalTensor zLocal = outQueueZ.AllocTensor(); for (uint32_t i = 0; i < mElementCount; ++i) { auto offset = yLocal.GetValue(i) / sizeof(T); auto srcValue = xLocal.GetValue(i); zLocal.SetValue(offset, srcValue); } - outQueueZ.EnQue(zLocal); + outQueueZ.EnQue(zLocal); inQueueX.FreeTensor(xLocal); inQueueY.FreeTensor(yLocal); } __aicore__ inline void CopyOut() { - AscendC::LocalTensor zLocal = outQueueZ.DeQue(); + AscendC::LocalTensor zLocal = outQueueZ.DeQue(); AscendC::DataCopy(zGm, zLocal, mElementCount); outQueueZ.FreeTensor(zLocal); } @@ -66,27 +68,18 @@ private: AscendC::TPipe pipe; AscendC::TQue inQueueX, inQueueY; AscendC::TQue outQueueZ; - AscendC::GlobalTensor xGm; - AscendC::GlobalTensor yGm; - AscendC::GlobalTensor zGm; + AscendC::GlobalTensor xGm; + AscendC::GlobalTensor yGm; + AscendC::GlobalTensor zGm; uint32_t mElementCount; }; -#define KERNEL_SCATTER(T, count) \ - extern "C" __global__ __aicore__ void scatter_custom(GM_ADDR srcGm, GM_ADDR dstOffsetGm, \ - GM_ADDR dstGm, GM_ADDR workspace, GM_ADDR tiling) \ - { \ - ScatterTest op; \ - op.Init(srcGm, dstOffsetGm, dstGm, count); \ - op.Process(); \ +#define KERNEL_SCATTER(T, count) \ + extern "C" __global__ __aicore__ void scatter_custom(GM_ADDR srcGm, GM_ADDR dstOffsetGm, GM_ADDR dstGm) \ + { \ + KernelScatter op; \ + op.Init(srcGm, dstOffsetGm, dstGm, count); \ + op.Process(); \ } KERNEL_SCATTER(half, 128); -#ifndef ASCENDC_CPU_DEBUG -// call of kernel function -void Scatter_custom_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *srcGm, uint8_t *dstOffsetGm, uint8_t *dstGm, - uint8_t *workspace, uint8_t *tiling) -{ - scatter_custom<<>>(srcGm, dstOffsetGm, dstGm, workspace, tiling); -} -#endif \ No newline at end of file diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scripts/gen_data.py b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scripts/gen_data.py new file mode 100644 index 000000000..f0442a917 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scripts/gen_data.py @@ -0,0 +1,27 @@ +#!/usr/bin/python3 +# coding=utf-8 +# ---------------------------------------------------------------------------------------------------------- +# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# This program is free software, you can redistribute it and/or modify it under the terms and conditions of +# CANN Open Software License Agreement Version 2.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ---------------------------------------------------------------------------------------------------------- + +import numpy as np + + +def gen_golden_data_simple(): + input_x = np.arange(128, dtype = np.float16) + input_y = np.arange(254, -1, -2, dtype = np.uint32) + golden = np.arange(127, -1, -1, dtype = np.float16) + + input_x.tofile("./input/input_x.bin") + input_y.tofile("./input/input_y.bin") + golden.tofile("./output/golden.bin") + + +if __name__ == "__main__": + gen_golden_data_simple() diff --git a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scripts/verify_result.py similarity index 61% rename from operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/verify_result.py rename to operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scripts/verify_result.py index 2dd46f803..929348da7 100644 --- a/operator/ascendc/0_introduction/28_scatter_frameworklaunch/AclNNInvocation/scripts/verify_result.py +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scripts/verify_result.py @@ -1,12 +1,14 @@ #!/usr/bin/python3 # coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== +# ---------------------------------------------------------------------------------------------------------- +# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# This program is free software, you can redistribute it and/or modify it under the terms and conditions of +# CANN Open Software License Agreement Version 2.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ---------------------------------------------------------------------------------------------------------- import sys import numpy as np @@ -37,7 +39,7 @@ def verify_result(output, golden): if index == 100: break error_ratio = float(different_element_indexes.size) / golden.size - print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol)) + print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol)) return error_ratio <= error_tol diff --git a/operator/ascendc/0_introduction/README.md b/operator/ascendc/0_introduction/README.md index 3209760d6..25af7ca8a 100644 --- a/operator/ascendc/0_introduction/README.md +++ b/operator/ascendc/0_introduction/README.md @@ -42,7 +42,7 @@ | [25_simple_add](./25_simple_add) | Ascend C异构混合编程样例, 实现Add自定义Vector算子及调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品 | [26_simple_matmulleakyrelu](./26_simple_matmulleakyrelu) | Ascend C异构混合编程样例, 实现MatmulLeakyRelu自定义Cube+Vector算子及调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品 | [27_simple_add_cpp_extensions](./27_simple_add_cpp_extensions) | Ascend C异构混合编程样例, 实现Add自定义Vector算子动态库及pybind调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品 -| [28_Scatter_frameworklaunch](./28_Scatter_frameworklaunch) | 基于Ascend C的Scatter算子及FrameworkLaunch调用样例 | Atlas 训练系列产品
Atlas 推理系列产品AICore
Atlas A2训练系列产品/Atlas A2推理产品| +| [28_Scatter_kernellaunch](./28_Scatter_kernellaunch) | 基于Ascend C的Scatter算子及FrameworkLaunch调用样例 | Atlas 训练系列产品
Atlas 推理系列产品AICore
Atlas A2训练系列产品/Atlas A2推理产品| ## 获取样例代码 可以使用以下两种方式下载,请选择其中一种进行源码准备。 -- Gitee From 8e44b434f761f7e2eed85c4dd0e2a12404d47d57 Mon Sep 17 00:00:00 2001 From: Zql99 Date: Wed, 19 Nov 2025 11:48:23 +0800 Subject: [PATCH 3/3] add Scatter sample --- .../ScatterKernelInvocationAcl/README.md | 2 +- .../ScatterKernelInvocationAcl/main.cpp | 23 ++++--------------- .../scatter_custom.cpp | 7 ++++++ operator/ascendc/0_introduction/README.md | 2 +- 4 files changed, 13 insertions(+), 21 deletions(-) diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/README.md b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/README.md index 02e7bc9bf..27e7b31ff 100644 --- a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/README.md +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/README.md @@ -15,7 +15,7 @@ │ └── run.sh // 编译运行算子的脚本 ``` ## 代码实现介绍 -本调用样例中实现的是1971对于Scatter功能变换的兼容样例。 +本调用样例中实现的是对于Scatter功能变换的兼容样例,Atlas A2训练系列产品/Atlas A2推理系列产品不支持Scatter指令,使用标量搬出的方式进行兼容。 Scatter计算逻辑是:给定一个连续的输入张量和一个目的地址偏移张量,Scatter指令根据偏移地址生成新的结果张量后将输入张量分散到结果张量中。 兼容Scatter算子逻辑是:对于部分有规律的离散计算,可以通过Loop循环搬出的方式来提升效率,对于完全离散的场景。只能通过标量搬出的方式进行处理。 diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/main.cpp b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/main.cpp index c9aaaba9d..2b63ade09 100644 --- a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/main.cpp +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/main.cpp @@ -12,9 +12,10 @@ #include "data_utils.h" #ifndef ASCENDC_CPU_DEBUG #include "acl/acl.h" +extern void scatter_custom_do(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z); #else #include "tikicpulib.h" -extern "C" __global__ __aicore__ void scatter_custom(GM_ADDR srcGm, GM_ADDR dstOffsetGm, GM_ADDR dstGm); +extern "C" __global__ __aicore__ void scatter_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z); #endif int32_t main(int32_t argc, char *argv[]) @@ -63,28 +64,12 @@ int32_t main(int32_t argc, char *argv[]) CHECK_ACL(aclrtMemcpy(xDevice, inputByteSize, xHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE)); CHECK_ACL(aclrtMemcpy(yDevice, inputByteSize1, yHost, inputByteSize1, ACL_MEMCPY_HOST_TO_DEVICE)); - aclrtBinHandle binHandle = nullptr; - aclrtFuncHandle funcHandle = nullptr; - aclrtArgsHandle argsHandle = nullptr; - aclrtParamHandle paramHandle = nullptr; - - const char *filePath = "./out/fatbin/ascendc_kernels/ascendc_kernels.o"; - CHECK_ACL(aclrtBinaryLoadFromFile(filePath, nullptr, &binHandle)); - CHECK_ACL(aclrtBinaryGetFunction(binHandle, "scatter_custom", &funcHandle)); - CHECK_ACL(aclrtKernelArgsInit(funcHandle, &argsHandle)); - - CHECK_ACL(aclrtKernelArgsAppend(argsHandle, (void **)&xDevice, sizeof(uintptr_t), ¶mHandle)); - CHECK_ACL(aclrtKernelArgsAppend(argsHandle, (void **)&yDevice, sizeof(uintptr_t), ¶mHandle)); - CHECK_ACL(aclrtKernelArgsAppend(argsHandle, (void **)&zDevice, sizeof(uintptr_t), ¶mHandle)); - CHECK_ACL(aclrtKernelArgsFinalize(argsHandle)); - - CHECK_ACL(aclrtLaunchKernelWithConfig(funcHandle, blockDim, stream, nullptr, argsHandle, nullptr)); + scatter_custom_do(blockDim, stream, xDevice, yDevice, zDevice); CHECK_ACL(aclrtSynchronizeStream(stream)); CHECK_ACL(aclrtMemcpy(zHost, outputByteSize, zDevice, outputByteSize, ACL_MEMCPY_DEVICE_TO_HOST)); WriteFile("./output/output_z.bin", zHost, outputByteSize); - CHECK_ACL(aclrtBinaryUnLoad(binHandle)); CHECK_ACL(aclrtFree(xDevice)); CHECK_ACL(aclrtFree(yDevice)); CHECK_ACL(aclrtFree(zDevice)); @@ -97,4 +82,4 @@ int32_t main(int32_t argc, char *argv[]) CHECK_ACL(aclFinalize()); #endif return 0; -} +} \ No newline at end of file diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scatter_custom.cpp b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scatter_custom.cpp index 72c85d8af..bf4330024 100644 --- a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scatter_custom.cpp +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scatter_custom.cpp @@ -83,3 +83,10 @@ private: } KERNEL_SCATTER(half, 128); + +#ifndef ASCENDC_CPU_DEBUG +void scatter_custom_do(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z) +{ + scatter_custom<<>>(x, y, z); +} +#endif \ No newline at end of file diff --git a/operator/ascendc/0_introduction/README.md b/operator/ascendc/0_introduction/README.md index 25af7ca8a..2651691d4 100644 --- a/operator/ascendc/0_introduction/README.md +++ b/operator/ascendc/0_introduction/README.md @@ -42,7 +42,7 @@ | [25_simple_add](./25_simple_add) | Ascend C异构混合编程样例, 实现Add自定义Vector算子及调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品 | [26_simple_matmulleakyrelu](./26_simple_matmulleakyrelu) | Ascend C异构混合编程样例, 实现MatmulLeakyRelu自定义Cube+Vector算子及调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品 | [27_simple_add_cpp_extensions](./27_simple_add_cpp_extensions) | Ascend C异构混合编程样例, 实现Add自定义Vector算子动态库及pybind调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品 -| [28_Scatter_kernellaunch](./28_Scatter_kernellaunch) | 基于Ascend C的Scatter算子及FrameworkLaunch调用样例 | Atlas 训练系列产品
Atlas 推理系列产品AICore
Atlas A2训练系列产品/Atlas A2推理产品| +| [28_Scatter_kernellaunch](./28_Scatter_kernellaunch) | Ascend C的兼容Scatter算子及Kernel直调样例 | Atlas A2训练系列产品/Atlas A2推理产品 ## 获取样例代码 可以使用以下两种方式下载,请选择其中一种进行源码准备。 -- Gitee