diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/CMakeLists.txt b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c8026efb8c746ef00992690e5be6a1c460f113d4 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/CMakeLists.txt @@ -0,0 +1,44 @@ +cmake_minimum_required(VERSION 3.16) +project(Ascend_c) + +set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu") +set(SOC_VERSION "Ascend310P3" CACHE STRING "system on chip type") +set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest" + CACHE STRING "ASCEND CANN package installation directory" +) +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE) +endif() +if(CMAKE_INSTALL_PREFIX STREQUAL /usr/local) + set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE) +endif() + +file(GLOB KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/scatter_custom.cpp) + +if("${RUN_MODE}" STREQUAL "cpu") + include(cmake/cpu_lib.cmake) +elseif("${RUN_MODE}" STREQUAL "sim" OR "${RUN_MODE}" STREQUAL "npu") + include(cmake/npu_lib.cmake) +else() + message("invalid RUN_MODE: ${RUN_MODE}") +endif() +add_executable(ascendc_kernels_bbit ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp) + +target_compile_options(ascendc_kernels_bbit PRIVATE + $:-g>> + -O2 -std=c++17 -D_GLIBCXX_USE_CXX11_ABI=0 -Wall -Werror +) + +target_link_libraries(ascendc_kernels_bbit PRIVATE + $:host_intf_pub>> + $:tikicpulib::${SOC_VERSION}>> + ascendcl + $:c_sec>> + $:ascendc_kernels>> +) + +install(TARGETS ascendc_kernels_bbit + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/README.md b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/README.md new file mode 100644 index 0000000000000000000000000000000000000000..27e7b31ff1ac39583e47fd5469b01dceb98abf1f --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/README.md @@ -0,0 +1,72 @@ +## 概述 +本样例介绍兼容Scatter算子实现及核函数直调方法。 + +## 目录结构介绍 +``` +├── ScatterKernelInvocationAcl +│ ├── cmake // 编译工程文件 +│ ├── scripts +│ │ ├── gen_data.py // 输入数据和真值数据生成脚本 +│ │ └── verify_result.py // 验证输出数据和真值数据是否一致的验证脚本 +│ ├── scatter_custom.cpp // 算子kernel实现 +│ ├── CMakeLists.txt // 编译工程文件 +│ ├── data_utils.h // 数据读入写出函数 +│ ├── main.cpp // 主函数,调用算子的应用程序,含CPU域及NPU域调用 +│ └── run.sh // 编译运行算子的脚本 +``` +## 代码实现介绍 +本调用样例中实现的是对于Scatter功能变换的兼容样例,Atlas A2训练系列产品/Atlas A2推理系列产品不支持Scatter指令,使用标量搬出的方式进行兼容。 + + Scatter计算逻辑是:给定一个连续的输入张量和一个目的地址偏移张量,Scatter指令根据偏移地址生成新的结果张量后将输入张量分散到结果张量中。 + 兼容Scatter算子逻辑是:对于部分有规律的离散计算,可以通过Loop循环搬出的方式来提升效率,对于完全离散的场景。只能通过标量搬出的方式进行处理。 + + Scatter兼容样例的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor srcGm和dstGm搬运到Local Memory,分别存储在xLocal、yLocal,Compute任务负责对xLocal、yLocal进行标量计算,计算结果存储在zLocal中,CopyOut任务负责将输出数据从zLocal搬运至Global Memory上的输出Tensor dstGm中。具体请参考[scatter_custom.cpp](./scatter_custom.cpp)。 + +- 调用实现 + 1. CPU侧运行验证主要通过ICPU_RUN_KF CPU调测宏等CPU调测库提供的接口来完成; + 2. NPU侧运行验证主要通过使用aclrtLaunchKernelWithConfig函数调用来完成。 + + 应用程序通过ASCENDC_CPU_DEBUG 宏区分代码逻辑运行于CPU侧还是NPU侧。 + +## 运行样例算子 + - 打开样例目录 + 以命令行方式下载样例代码,master分支为例。 + ```bash + cd ${git_clone_path}/samples/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl + ``` + - 配置环境变量 + + 请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware),选择对应配置环境变量的命令。 + - 默认路径,root用户安装CANN软件包 + ```bash + export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest + ``` + - 默认路径,非root用户安装CANN软件包 + ```bash + export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest + ``` + - 指定路径install_path,安装CANN软件包 + ```bash + export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest + ``` + + - 样例执行 + + ```bash + bash run.sh -r [RUN_MODE] -v [SOC_VERSION] + ``` + - RUN_MODE:编译方式,可选择CPU调试,NPU上板。支持参数为[cpu / npu] + - SOC_VERSION:昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号: + - Atlas 训练系列产品 + - Atlas 推理系列产品AI Core + - Atlas A2训练系列产品/Atlas 800I A2推理产品 + - Atlas 200/500 A2推理产品 + + 示例如下,Ascendxxxyy请替换为实际的AI处理器型号。 + ```bash + bash run.sh -r cpu -v Ascendxxxyy + ``` +## 更新说明 +| 时间 | 更新事项 | +| ---------- | ------------ | +| 2025/06/05 | 新增本readme | \ No newline at end of file diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/cmake/cpu_lib.cmake b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/cmake/cpu_lib.cmake new file mode 100644 index 0000000000000000000000000000000000000000..751a119411509a4eeec79b76a875776206daeaf6 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/cmake/cpu_lib.cmake @@ -0,0 +1,9 @@ +if(NOT DEFINED ENV{CMAKE_PREFIX_PATH}) + set(CMAKE_PREFIX_PATH ${ASCEND_CANN_PACKAGE_PATH}/tools/tikicpulib/lib/cmake) +endif() +find_package(tikicpulib REQUIRED) + +add_library(ascendc_kernels SHARED ${KERNEL_FILES}) +target_link_libraries(ascendc_kernels PUBLIC tikicpulib::${SOC_VERSION}) +target_compile_options(ascendc_kernels PRIVATE -g -O0 -std=c++17) +install(TARGETS ascendc_kernels DESTINATION ${CMAKE_INSTALL_LIBDIR}) diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/cmake/npu_lib.cmake b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/cmake/npu_lib.cmake new file mode 100644 index 0000000000000000000000000000000000000000..d862f006417dcb8cf30cb8c33f293c2f869ff6e1 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/cmake/npu_lib.cmake @@ -0,0 +1,10 @@ +if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) +elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) +else() + message(FATAL_ERROR "ascendc_kernel_cmake does not exist ,please check whether the cann package is installed") +endif() +include(${ASCENDC_CMAKE_DIR}/ascendc.cmake) + +ascendc_fatbin_library(ascendc_kernels ${KERNEL_FILES}) diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/data_utils.h b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/data_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..82c2c743c5500569c8dc56800c263565044ea90d --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/data_utils.h @@ -0,0 +1,242 @@ +/** + * @file data_utils.h + * +* Copyright (c) 2025 Huawei Technologies Co., Ltd. +* This program is free software, you can redistribute it and/or modify it under the terms and conditions of +* CANN Open Software License Agreement Version 2.0 (the "License"). +* Please refer to the License for details. You may not use this file except in compliance with the License. +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +* See LICENSE in the root of the software repository for the full text of the License. +*/ +#ifndef DATA_UTILS_H +#define DATA_UTILS_H +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "acl/acl.h" + +typedef enum { + DT_UNDEFINED = -1, + FLOAT = 0, + HALF = 1, + INT8_T = 2, + INT32_T = 3, + UINT8_T = 4, + INT16_T = 6, + UINT16_T = 7, + UINT32_T = 8, + INT64_T = 9, + UINT64_T = 10, + DOUBLE = 11, + BOOL = 12, + STRING = 13, + COMPLEX64 = 16, + COMPLEX128 = 17, + BF16 = 27 +} printDataType; + +#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args) +#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN] " fmt "\n", ##args) +#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR] " fmt "\n", ##args) +#define CHECK_ACL(x) \ + do { \ + aclError __ret = x; \ + if (__ret != ACL_ERROR_NONE) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \ + } \ + } while (0); + +/** + * @brief Read data from file + * @param [in] filePath: file path + * @param [out] fileSize: file size + * @return read result + */ +bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize) +{ + struct stat sBuf; + int fileStatus = stat(filePath.data(), &sBuf); + if (fileStatus == -1) { + ERROR_LOG("failed to get file"); + return false; + } + if (S_ISREG(sBuf.st_mode) == 0) { + ERROR_LOG("%s is not a file, please enter a file", filePath.c_str()); + return false; + } + + std::ifstream file; + file.open(filePath, std::ios::binary); + if (!file.is_open()) { + ERROR_LOG("Open file failed. path = %s", filePath.c_str()); + return false; + } + + std::filebuf *buf = file.rdbuf(); + size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in); + if (size == 0) { + ERROR_LOG("file size is 0"); + file.close(); + return false; + } + if (size > bufferSize) { + ERROR_LOG("file size is larger than buffer size"); + file.close(); + return false; + } + buf->pubseekpos(0, std::ios::in); + buf->sgetn(static_cast(buffer), size); + fileSize = size; + file.close(); + return true; +} + +/** + * @brief Write data to file + * @param [in] filePath: file path + * @param [in] buffer: data to write to file + * @param [in] size: size to write + * @return write result + */ +bool WriteFile(const std::string &filePath, const void *buffer, size_t size) +{ + if (buffer == nullptr) { + ERROR_LOG("Write file failed. buffer is nullptr"); + return false; + } + + int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE); + if (fd < 0) { + ERROR_LOG("Open file failed. path = %s", filePath.c_str()); + return false; + } + + size_t writeSize = write(fd, buffer, size); + (void)close(fd); + if (writeSize != size) { + ERROR_LOG("Write file Failed."); + return false; + } + + return true; +} + +/** + * @brief Reads a binary file into memory. + * + * This function opens a binary file, reads its contents into a dynamically allocated memory buffer, + * and returns a pointer to the buffer and the size of the file through output parameters. + * + * @param filePath The path to the binary file to be read. + * @param outBuffer A reference to a unique pointer that will hold the file data. + * @param outSize A reference to a size_t that will hold the size of the file. + * @return true if the file was read successfully, false otherwise. + */ +bool ReadBinaryFile(const char *filePath, std::unique_ptr &outBuffer, size_t &outSize) +{ + FILE *file = fopen(filePath, "rb"); + if (!file) { + ERROR_LOG("Error opening file: %s\n", strerror(errno)); + return false; + } + + fseek(file, 0, SEEK_END); + outSize = ftell(file); + rewind(file); + + outBuffer.reset(new char[outSize]); + if (fread(outBuffer.get(), 1, outSize, file) != outSize) { + ERROR_LOG("Error reading file.\n"); + fclose(file); + return false; + } + + fclose(file); + return true; +} + +template void DoPrintData(const T *data, size_t count, size_t elementsPerRow) +{ + assert(elementsPerRow != 0); + for (size_t i = 0; i < count; ++i) { + std::cout << std::setw(10) << data[i]; + if (i % elementsPerRow == elementsPerRow - 1) { + std::cout << std::endl; + } + } +} + +void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow) +{ + assert(elementsPerRow != 0); + for (size_t i = 0; i < count; ++i) { + std::cout << std::setw(10) << std::setprecision(6) << aclFloat16ToFloat(data[i]); + if (i % elementsPerRow == elementsPerRow - 1) { + std::cout << std::endl; + } + } +} + +void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow = 16) +{ + if (data == nullptr) { + ERROR_LOG("Print data failed. data is nullptr"); + return; + } + + switch (dataType) { + case BOOL: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT8_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT8_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT16_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT16_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT32_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT32_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT64_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT64_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case HALF: + DoPrintHalfData(reinterpret_cast(data), count, elementsPerRow); + break; + case FLOAT: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case DOUBLE: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + default: + ERROR_LOG("Unsupported type: %d", dataType); + } + std::cout << std::endl; +} +#endif // DATA_UTILS_H diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/main.cpp b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2b63ade095a43420565e58ebaceef9dd2e313702 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/main.cpp @@ -0,0 +1,85 @@ +/** + * @file main.cpp + * +* Copyright (c) 2025 Huawei Technologies Co., Ltd. +* This program is free software, you can redistribute it and/or modify it under the terms and conditions of +* CANN Open Software License Agreement Version 2.0 (the "License"). +* Please refer to the License for details. You may not use this file except in compliance with the License. +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +* See LICENSE in the root of the software repository for the full text of the License. +*/ +#include "data_utils.h" +#ifndef ASCENDC_CPU_DEBUG +#include "acl/acl.h" +extern void scatter_custom_do(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z); +#else +#include "tikicpulib.h" +extern "C" __global__ __aicore__ void scatter_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z); +#endif + +int32_t main(int32_t argc, char *argv[]) +{ + uint32_t blockDim = 1; + size_t inputByteSize = 128 * sizeof(uint16_t); + size_t inputByteSize1 = 128 * sizeof(uint32_t); + size_t outputByteSize = 128 * sizeof(uint16_t); + +#ifdef ASCENDC_CPU_DEBUG + uint8_t *x = (uint8_t *)AscendC::GmAlloc(inputByteSize); + uint8_t *y = (uint8_t *)AscendC::GmAlloc(inputByteSize1); + uint8_t *z = (uint8_t *)AscendC::GmAlloc(outputByteSize); + + ReadFile("./input/input_x.bin", inputByteSize, x, inputByteSize); + ReadFile("./input/input_y.bin", inputByteSize1, y, inputByteSize1); + + AscendC::SetKernelMode(KernelMode::AIV_MODE); + ICPU_RUN_KF(scatter_custom, blockDim, x, y, z); // use this macro for cpu debug + + WriteFile("./output/output_z.bin", z, outputByteSize); + + AscendC::GmFree((void *)x); + AscendC::GmFree((void *)y); + AscendC::GmFree((void *)z); +#else + CHECK_ACL(aclInit(nullptr)); + int32_t deviceId = 0; + CHECK_ACL(aclrtSetDevice(deviceId)); + aclrtStream stream = nullptr; + CHECK_ACL(aclrtCreateStream(&stream)); + + uint8_t *xHost, *yHost, *zHost; + uint8_t *xDevice, *yDevice, *zDevice; + + CHECK_ACL(aclrtMallocHost((void **)(&xHost), inputByteSize)); + CHECK_ACL(aclrtMallocHost((void **)(&yHost), inputByteSize1)); + CHECK_ACL(aclrtMallocHost((void **)(&zHost), outputByteSize)); + CHECK_ACL(aclrtMalloc((void **)&xDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&yDevice, inputByteSize1, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&zDevice, outputByteSize, ACL_MEM_MALLOC_HUGE_FIRST)); + + ReadFile("./input/input_x.bin", inputByteSize, xHost, inputByteSize); + ReadFile("./input/input_y.bin", inputByteSize1, yHost, inputByteSize1); + + CHECK_ACL(aclrtMemcpy(xDevice, inputByteSize, xHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE)); + CHECK_ACL(aclrtMemcpy(yDevice, inputByteSize1, yHost, inputByteSize1, ACL_MEMCPY_HOST_TO_DEVICE)); + + scatter_custom_do(blockDim, stream, xDevice, yDevice, zDevice); + CHECK_ACL(aclrtSynchronizeStream(stream)); + + CHECK_ACL(aclrtMemcpy(zHost, outputByteSize, zDevice, outputByteSize, ACL_MEMCPY_DEVICE_TO_HOST)); + WriteFile("./output/output_z.bin", zHost, outputByteSize); + + CHECK_ACL(aclrtFree(xDevice)); + CHECK_ACL(aclrtFree(yDevice)); + CHECK_ACL(aclrtFree(zDevice)); + CHECK_ACL(aclrtFreeHost(xHost)); + CHECK_ACL(aclrtFreeHost(yHost)); + CHECK_ACL(aclrtFreeHost(zHost)); + + CHECK_ACL(aclrtDestroyStream(stream)); + CHECK_ACL(aclrtResetDevice(deviceId)); + CHECK_ACL(aclFinalize()); +#endif + return 0; +} \ No newline at end of file diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/run.sh b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..eeff1b86f1804653a03da4d01c85ef66b6a8e4ab --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/run.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# ---------------------------------------------------------------------------------------------------------- +# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# This program is free software, you can redistribute it and/or modify it under the terms and conditions of +# CANN Open Software License Agreement Version 2.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ---------------------------------------------------------------------------------------------------------- +CURRENT_DIR=$( + cd $(dirname ${BASH_SOURCE:-$0}) + pwd +) +cd $CURRENT_DIR + +BUILD_TYPE="Debug" +INSTALL_PREFIX="${CURRENT_DIR}/out" + +SHORT=r:,v:,i:,b:,p:, +LONG=run-mode:,soc-version:,install-path:,build-type:,install-prefix:, +OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") +eval set -- "$OPTS" + +while :; do + case "$1" in + -r | --run-mode) + RUN_MODE="$2" + shift 2 + ;; + -v | --soc-version) + SOC_VERSION="$2" + shift 2 + ;; + -i | --install-path) + ASCEND_INSTALL_PATH="$2" + shift 2 + ;; + -b | --build-type) + BUILD_TYPE="$2" + shift 2 + ;; + -p | --install-prefix) + INSTALL_PREFIX="$2" + shift 2 + ;; + --) + shift + break + ;; + *) + echo "[ERROR]: Unexpected option: $1" + break + ;; + esac +done + +RUN_MODE_LIST="cpu npu" +if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then + echo "[ERROR]: RUN_MODE error, This sample only support specify cpu or npu!" + exit -1 +fi + +VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4" +if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then + echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]" + exit -1 +fi + +if [ -n "$ASCEND_INSTALL_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH +elif [ -n "$ASCEND_HOME_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH +else + if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then + _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest + else + _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest + fi +fi + +export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH} +export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH} +echo "[INFO]: Current compile soc version is ${SOC_VERSION}" +source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash +if [ "${RUN_MODE}" = "cpu" ]; then + export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH +fi + +set -e +rm -rf build out +mkdir -p build +cmake -B build \ + -DRUN_MODE=${RUN_MODE} \ + -DSOC_VERSION=${SOC_VERSION} \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ + -DASCEND_CANN_PACKAGE_PATH=${_ASCEND_INSTALL_PATH} +cmake --build build -j +cmake --install build + +rm -f ascendc_kernels_bbit +cp ./out/bin/ascendc_kernels_bbit ./ +rm -rf input output +mkdir -p input output +python3 scripts/gen_data.py +( + export LD_LIBRARY_PATH=$(pwd)/out/lib:$(pwd)/out/lib64:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH + if [[ "$RUN_WITH_TOOLCHAIN" -eq 1 ]]; then + if [ "${RUN_MODE}" = "npu" ]; then + msprof op --application=./ascendc_kernels_bbit + elif [ "${RUN_MODE}" = "sim" ]; then + msprof op simulator --application=./ascendc_kernels_bbit + elif [ "${RUN_MODE}" = "cpu" ]; then + ./ascendc_kernels_bbit + fi + else + ./ascendc_kernels_bbit + fi +) +md5sum output/*.bin +python3 scripts/verify_result.py output/output_z.bin output/golden.bin diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scatter_custom.cpp b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scatter_custom.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bf4330024a45106f31e65111ecdf6b1045192171 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scatter_custom.cpp @@ -0,0 +1,92 @@ +/** + * @file add_custom.cpp + * +* Copyright (c) 2025 Huawei Technologies Co., Ltd. +* This program is free software, you can redistribute it and/or modify it under the terms and conditions of +* CANN Open Software License Agreement Version 2.0 (the "License"). +* Please refer to the License for details. You may not use this file except in compliance with the License. +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +* See LICENSE in the root of the software repository for the full text of the License. +*/ +#include "kernel_operator.h" + +constexpr int32_t BUFFER_NUM = 1; // tensor num for each queue + +class KernelScatter { +public: + __aicore__ inline KernelScatter() {} + __aicore__ inline void Init(GM_ADDR srcGm, GM_ADDR dstOffsetGm, GM_ADDR dstGm, uint32_t count) + { + mElementCount = count; + xGm.SetGlobalBuffer((__gm__ half *)srcGm, mElementCount); + yGm.SetGlobalBuffer((__gm__ uint32_t *)dstOffsetGm, mElementCount); + zGm.SetGlobalBuffer((__gm__ half *)dstGm, mElementCount); + pipe.InitBuffer(inQueueX, BUFFER_NUM, mElementCount * sizeof(half)); + pipe.InitBuffer(inQueueY, BUFFER_NUM, mElementCount * sizeof(uint32_t)); + pipe.InitBuffer(outQueueZ, BUFFER_NUM, mElementCount * sizeof(half)); + } + __aicore__ inline void Process() + { + CopyIn(); + Compute(); + CopyOut(); + } + +private: + __aicore__ inline void CopyIn() + { + AscendC::LocalTensor xLocal = inQueueX.AllocTensor(); + AscendC::LocalTensor yLocal = inQueueY.AllocTensor(); + AscendC::DataCopy(xLocal, xGm, mElementCount); + AscendC::DataCopy(yLocal, yGm, mElementCount); + inQueueX.EnQue(xLocal); + inQueueY.EnQue(yLocal); + } + __aicore__ inline void Compute() + { + AscendC::LocalTensor xLocal = inQueueX.DeQue(); + AscendC::LocalTensor yLocal = inQueueY.DeQue(); + AscendC::LocalTensor zLocal = outQueueZ.AllocTensor(); + for (uint32_t i = 0; i < mElementCount; ++i) { + auto offset = yLocal.GetValue(i) / sizeof(T); + auto srcValue = xLocal.GetValue(i); + zLocal.SetValue(offset, srcValue); + } + outQueueZ.EnQue(zLocal); + inQueueX.FreeTensor(xLocal); + inQueueY.FreeTensor(yLocal); + } + __aicore__ inline void CopyOut() + { + AscendC::LocalTensor zLocal = outQueueZ.DeQue(); + AscendC::DataCopy(zGm, zLocal, mElementCount); + outQueueZ.FreeTensor(zLocal); + } + +private: + AscendC::TPipe pipe; + AscendC::TQue inQueueX, inQueueY; + AscendC::TQue outQueueZ; + AscendC::GlobalTensor xGm; + AscendC::GlobalTensor yGm; + AscendC::GlobalTensor zGm; + uint32_t mElementCount; +}; + +#define KERNEL_SCATTER(T, count) \ + extern "C" __global__ __aicore__ void scatter_custom(GM_ADDR srcGm, GM_ADDR dstOffsetGm, GM_ADDR dstGm) \ + { \ + KernelScatter op; \ + op.Init(srcGm, dstOffsetGm, dstGm, count); \ + op.Process(); \ + } + +KERNEL_SCATTER(half, 128); + +#ifndef ASCENDC_CPU_DEBUG +void scatter_custom_do(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z) +{ + scatter_custom<<>>(x, y, z); +} +#endif \ No newline at end of file diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scripts/gen_data.py b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scripts/gen_data.py new file mode 100644 index 0000000000000000000000000000000000000000..f0442a91762af2e9d39ac51bbac208b403e0a00e --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scripts/gen_data.py @@ -0,0 +1,27 @@ +#!/usr/bin/python3 +# coding=utf-8 +# ---------------------------------------------------------------------------------------------------------- +# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# This program is free software, you can redistribute it and/or modify it under the terms and conditions of +# CANN Open Software License Agreement Version 2.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ---------------------------------------------------------------------------------------------------------- + +import numpy as np + + +def gen_golden_data_simple(): + input_x = np.arange(128, dtype = np.float16) + input_y = np.arange(254, -1, -2, dtype = np.uint32) + golden = np.arange(127, -1, -1, dtype = np.float16) + + input_x.tofile("./input/input_x.bin") + input_y.tofile("./input/input_y.bin") + golden.tofile("./output/golden.bin") + + +if __name__ == "__main__": + gen_golden_data_simple() diff --git a/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scripts/verify_result.py b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scripts/verify_result.py new file mode 100644 index 0000000000000000000000000000000000000000..929348da7b0b26f4c0ca37f9cf181e3b710a0901 --- /dev/null +++ b/operator/ascendc/0_introduction/28_scatter_kernellaunch/ScatterKernelInvocationAcl/scripts/verify_result.py @@ -0,0 +1,55 @@ +#!/usr/bin/python3 +# coding=utf-8 +# ---------------------------------------------------------------------------------------------------------- +# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# This program is free software, you can redistribute it and/or modify it under the terms and conditions of +# CANN Open Software License Agreement Version 2.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ---------------------------------------------------------------------------------------------------------- + +import sys +import numpy as np + +# for float16 +relative_tol = 1e-3 +absolute_tol = 1e-5 +error_tol = 1e-3 + + +def verify_result(output, golden): + output = np.fromfile(output, dtype=np.float16).reshape(-1) + golden = np.fromfile(golden, dtype=np.float16).reshape(-1) + different_element_results = np.isclose(output, + golden, + rtol=relative_tol, + atol=absolute_tol, + equal_nan=True) + different_element_indexes = np.where(different_element_results == False)[0] + for index in range(len(different_element_indexes)): + real_index = different_element_indexes[index] + golden_data = golden[real_index] + output_data = output[real_index] + print( + "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" % + (real_index, golden_data, output_data, + abs(output_data - golden_data) / golden_data)) + if index == 100: + break + error_ratio = float(different_element_indexes.size) / golden.size + print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol)) + return error_ratio <= error_tol + + +if __name__ == '__main__': + try: + res = verify_result(sys.argv[1], sys.argv[2]) + if not res: + raise ValueError("[ERROR] result error") + else: + print("test pass") + except Exception as e: + print(e) + sys.exit(1) diff --git a/operator/ascendc/0_introduction/README.md b/operator/ascendc/0_introduction/README.md index 0b9383e71f39813dacd1faf17c7f241247515b64..2651691d489a47bd8b815cf26b317f762fd56e17 100644 --- a/operator/ascendc/0_introduction/README.md +++ b/operator/ascendc/0_introduction/README.md @@ -42,6 +42,7 @@ | [25_simple_add](./25_simple_add) | Ascend C异构混合编程样例, 实现Add自定义Vector算子及调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品 | [26_simple_matmulleakyrelu](./26_simple_matmulleakyrelu) | Ascend C异构混合编程样例, 实现MatmulLeakyRelu自定义Cube+Vector算子及调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品 | [27_simple_add_cpp_extensions](./27_simple_add_cpp_extensions) | Ascend C异构混合编程样例, 实现Add自定义Vector算子动态库及pybind调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品 +| [28_Scatter_kernellaunch](./28_Scatter_kernellaunch) | Ascend C的兼容Scatter算子及Kernel直调样例 | Atlas A2训练系列产品/Atlas A2推理产品 ## 获取样例代码 可以使用以下两种方式下载,请选择其中一种进行源码准备。